From 7475a207009727db6b8b7a7fb23327a3b2a18e3b Mon Sep 17 00:00:00 2001 From: Oliver Stanley Date: Thu, 20 Mar 2025 09:27:03 +0000 Subject: [PATCH] include ranges rather than sampled values in difficulty metadata dicts (#387) * update difficulty metadata for logic datasets * update difficulty metadata for graph datasets * update difficulty metadata for geometry datasets * update difficulty metadata for games datasets * update difficulty metadata for cognition datasets * update difficulty metadata for arithmetic datasets * update difficulty metadata for arc datasets * update difficulty metadata for algorithmic datasets * update difficulty metadata for algebra datasets * use tuples * update tests * update tests --- reasoning_gym/algebra/polynomial_equations.py | 6 +++++- reasoning_gym/algebra/simple_integration.py | 5 ++++- reasoning_gym/algorithmic/base_conversion.py | 4 ++-- reasoning_gym/algorithmic/binary_alternation.py | 5 ++++- reasoning_gym/algorithmic/binary_matrix.py | 3 ++- reasoning_gym/algorithmic/caesar_cipher.py | 5 +++-- reasoning_gym/algorithmic/count_primes.py | 3 ++- reasoning_gym/algorithmic/cryptarithm.py | 3 +-- reasoning_gym/algorithmic/game_of_life_halting.py | 7 +++++++ reasoning_gym/algorithmic/graph_color.py | 6 +++++- reasoning_gym/algorithmic/group_anagrams.py | 4 +++- reasoning_gym/algorithmic/isomorphic_strings.py | 3 ++- reasoning_gym/algorithmic/letter_counting.py | 4 +++- reasoning_gym/algorithmic/letter_jumble.py | 4 ++-- reasoning_gym/algorithmic/manipulate_matrix.py | 9 ++++++--- reasoning_gym/algorithmic/number_filtering.py | 3 ++- reasoning_gym/algorithmic/number_sorting.py | 3 ++- reasoning_gym/algorithmic/palindrome_generation.py | 3 ++- reasoning_gym/algorithmic/palindrome_partitioning.py | 7 ++++++- reasoning_gym/algorithmic/pool_matrix.py | 9 ++++++--- reasoning_gym/algorithmic/ransom_note.py | 6 ++++-- reasoning_gym/algorithmic/rotate_matrix.py | 5 +++-- reasoning_gym/algorithmic/rotten_oranges.py | 5 ++++- reasoning_gym/algorithmic/sentence_reordering.py | 7 ++++++- reasoning_gym/algorithmic/spell_backward.py | 4 +++- reasoning_gym/algorithmic/spiral_matrix.py | 7 +++++-- reasoning_gym/algorithmic/string_insertion.py | 3 ++- reasoning_gym/algorithmic/string_manipulation.py | 6 ++++-- reasoning_gym/algorithmic/string_splitting.py | 3 ++- reasoning_gym/algorithmic/string_synthesis.py | 3 ++- reasoning_gym/algorithmic/word_ladder.py | 3 +-- reasoning_gym/algorithmic/word_sequence_reversal.py | 8 +++++++- reasoning_gym/algorithmic/word_sorting.py | 10 ++++++---- reasoning_gym/arc/rearc.py | 6 ++++-- reasoning_gym/arithmetic/basic_arithmetic.py | 7 ++++++- reasoning_gym/arithmetic/chain_sum.py | 10 ++++++---- reasoning_gym/arithmetic/count_bits.py | 5 ++++- reasoning_gym/arithmetic/decimal_arithmetic.py | 6 ++++-- reasoning_gym/arithmetic/decimal_chain_sum.py | 11 +++++++---- reasoning_gym/arithmetic/dice.py | 2 +- reasoning_gym/arithmetic/fraction_simplification.py | 5 +++-- reasoning_gym/arithmetic/gcd.py | 5 +++-- reasoning_gym/arithmetic/lcm.py | 2 +- reasoning_gym/arithmetic/leg_counting.py | 8 +++++--- reasoning_gym/arithmetic/number_format.py | 3 ++- reasoning_gym/arithmetic/power_function.py | 9 ++++++++- reasoning_gym/arithmetic/prime_factorization.py | 8 +++++++- reasoning_gym/arithmetic/products.py | 10 ++++++---- reasoning_gym/cognition/color_cube_rotation.py | 4 +++- reasoning_gym/cognition/figlet_fonts.py | 4 +++- reasoning_gym/cognition/modulo_grid.py | 4 +++- reasoning_gym/cognition/needle_haystack.py | 8 +++++++- reasoning_gym/cognition/number_sequences.py | 9 ++++++++- reasoning_gym/cognition/rectangle_count.py | 9 ++++++++- reasoning_gym/cognition/rubiks_cube.py | 2 +- reasoning_gym/games/boxnet.py | 11 +++++++---- reasoning_gym/games/emoji_mystery.py | 5 ++++- reasoning_gym/games/futoshiki.py | 7 ++++++- reasoning_gym/games/mahjong.py | 4 +++- reasoning_gym/games/maze.py | 4 ++-- reasoning_gym/games/mini_sudoku.py | 2 +- reasoning_gym/games/n_queens.py | 2 +- reasoning_gym/games/rush_hour.py | 4 +++- reasoning_gym/games/sokoban.py | 12 ++++++++++-- reasoning_gym/games/sudoku.py | 2 +- reasoning_gym/games/tower_of_hanoi.py | 3 +++ reasoning_gym/games/tsumego.py | 8 +++++++- reasoning_gym/geometry/simple_geometry.py | 4 +++- reasoning_gym/graphs/course_schedule.py | 6 +++++- reasoning_gym/graphs/family_relationships.py | 2 +- reasoning_gym/graphs/largest_island.py | 7 ++++--- reasoning_gym/graphs/quantum_lock.py | 2 +- reasoning_gym/graphs/shortest_path.py | 4 ++-- reasoning_gym/logic/circuit_logic.py | 2 +- reasoning_gym/logic/propositional_logic.py | 4 ++-- reasoning_gym/logic/self_reference.py | 4 +++- tests/test_boxnet.py | 8 ++++---- tests/test_coaching.py | 10 +++++++--- tests/test_rearc.py | 8 ++++---- tests/test_tsumego.py | 2 +- 80 files changed, 304 insertions(+), 126 deletions(-) diff --git a/reasoning_gym/algebra/polynomial_equations.py b/reasoning_gym/algebra/polynomial_equations.py index 83576a0a..07ee0b6a 100644 --- a/reasoning_gym/algebra/polynomial_equations.py +++ b/reasoning_gym/algebra/polynomial_equations.py @@ -124,7 +124,11 @@ In solving equations, please follow these instructions: "variable": variable, "degree": degree, "real_solutions": real_solutions, - "difficulty": {"terms": num_terms, "degree": degree}, + "num_terms": num_terms, + "difficulty": { + "terms": (self.config.min_terms, self.config.max_terms), + "degree": (self.config.min_degree, self.config.max_degree), + }, }, } diff --git a/reasoning_gym/algebra/simple_integration.py b/reasoning_gym/algebra/simple_integration.py index 610aa7fe..16aa2d1b 100644 --- a/reasoning_gym/algebra/simple_integration.py +++ b/reasoning_gym/algebra/simple_integration.py @@ -85,7 +85,10 @@ When performing calculations, please follow these guidelines: "integrand": str(derivative), "variable": str(symbol), "expected_answer_expression": polynomial, - "difficulty": {"terms": num_terms}, + "num_terms": num_terms, + "difficulty": { + "terms": (self.config.min_terms, self.config.max_terms), + }, }, } diff --git a/reasoning_gym/algorithmic/base_conversion.py b/reasoning_gym/algorithmic/base_conversion.py index bea8bbbf..80c9bb29 100644 --- a/reasoning_gym/algorithmic/base_conversion.py +++ b/reasoning_gym/algorithmic/base_conversion.py @@ -110,8 +110,8 @@ class BaseConversionDataset(ProceduralDataset): "source_repr": source_repr, "target_repr": target_repr, "difficulty": { - "value": value, - "base": (source_base, target_base), + "base": (self.config.min_base, self.config.max_base), + "value": (self.config.min_value, self.config.max_value), }, }, } diff --git a/reasoning_gym/algorithmic/binary_alternation.py b/reasoning_gym/algorithmic/binary_alternation.py index 42d3b17f..986f97f1 100644 --- a/reasoning_gym/algorithmic/binary_alternation.py +++ b/reasoning_gym/algorithmic/binary_alternation.py @@ -108,7 +108,10 @@ class BinaryAlternationDataset(ProceduralDataset): "string": string, "solution": answer, "solvable": solvable, - "difficulty": {"n": n}, + "n": n, + "difficulty": { + "n": (self.config.min_n, self.config.max_n), + }, }, } diff --git a/reasoning_gym/algorithmic/binary_matrix.py b/reasoning_gym/algorithmic/binary_matrix.py index b90215d3..1c7db03b 100644 --- a/reasoning_gym/algorithmic/binary_matrix.py +++ b/reasoning_gym/algorithmic/binary_matrix.py @@ -130,8 +130,9 @@ class BinaryMatrixDataset(ProceduralDataset): "metadata": { "matrix": matrix, "solution": answer, + "n": n, "difficulty": { - "n": n, + "n": (self.config.min_n, self.config.max_n), "p_zero": self.config.p_zero, }, }, diff --git a/reasoning_gym/algorithmic/caesar_cipher.py b/reasoning_gym/algorithmic/caesar_cipher.py index 579a35ca..e28c1dc4 100644 --- a/reasoning_gym/algorithmic/caesar_cipher.py +++ b/reasoning_gym/algorithmic/caesar_cipher.py @@ -80,9 +80,10 @@ class CaesarCipherDataset(ProceduralDataset): "rotation": rotation, "cipher_text": cipher_text, "clear_text": sentence, + "num_words": num_words, "difficulty": { - "rotation": rotation, - "words": num_words, + "words": (self.config.min_words, self.config.max_words), + "rotation": (self.config.min_rotation, self.config.max_rotation), }, }, } diff --git a/reasoning_gym/algorithmic/count_primes.py b/reasoning_gym/algorithmic/count_primes.py index af0bf84f..424b0884 100644 --- a/reasoning_gym/algorithmic/count_primes.py +++ b/reasoning_gym/algorithmic/count_primes.py @@ -64,8 +64,9 @@ class CountPrimesDataset(ProceduralDataset): "end": end, "primes": primes, "solution": answer, + "n": (start, end), "difficulty": { - "n": (start, end), + "n": (self.config.min_n, self.config.max_n), }, }, } diff --git a/reasoning_gym/algorithmic/cryptarithm.py b/reasoning_gym/algorithmic/cryptarithm.py index 3ca9c6a0..c989960d 100644 --- a/reasoning_gym/algorithmic/cryptarithm.py +++ b/reasoning_gym/algorithmic/cryptarithm.py @@ -187,8 +187,7 @@ class CryptarithmDataset(ProceduralDataset): "digit_to_letter": digit_to_letter, "letter_to_digit": letter_to_digit, "difficulty": { - "min_words": self.config.min_words, - "max_words": self.config.max_words, + "words": (self.config.min_words, self.config.max_words), }, }, } diff --git a/reasoning_gym/algorithmic/game_of_life_halting.py b/reasoning_gym/algorithmic/game_of_life_halting.py index e5b464c4..51b2bc3a 100644 --- a/reasoning_gym/algorithmic/game_of_life_halting.py +++ b/reasoning_gym/algorithmic/game_of_life_halting.py @@ -368,6 +368,13 @@ class GameOfLifeHaltingDataset(ProceduralDataset): "placed_patterns": placed_patterns, "simulation_steps": self.config.max_simulation_steps, "should_oscillate": should_oscillate, + "difficulty": { + "grid_size_x": self.config.grid_size_x, + "grid_size_y": self.config.grid_size_y, + "difficulty": self.config.difficulty, + "num_oscillators": self.config.num_oscillators, + "max_simulation_steps": self.config.max_simulation_steps, + }, }, } diff --git a/reasoning_gym/algorithmic/graph_color.py b/reasoning_gym/algorithmic/graph_color.py index 75c99e05..749f3b45 100644 --- a/reasoning_gym/algorithmic/graph_color.py +++ b/reasoning_gym/algorithmic/graph_color.py @@ -215,7 +215,11 @@ Return your solution as a JSON map of vertices to colors. (For example: {{"0": 1 "metadata": { "possible_answer": solution, "puzzle": puzzle, - "difficulty": {"num_vertices": num_vertices, "num_colors": num_colors}, + "num_vertices": num_vertices, + "difficulty": { + "num_vertices": (self.config.min_num_vertices, self.config.max_num_vertices), + "num_colors": num_colors, + }, }, } diff --git a/reasoning_gym/algorithmic/group_anagrams.py b/reasoning_gym/algorithmic/group_anagrams.py index ec212843..6b485e9e 100644 --- a/reasoning_gym/algorithmic/group_anagrams.py +++ b/reasoning_gym/algorithmic/group_anagrams.py @@ -117,8 +117,10 @@ class GroupAnagramsDataset(ProceduralDataset): "metadata": { "words": words, "solution": answer, + "anagram_groups": anagram_groups, "difficulty": { - "anagram_groups": anagram_groups, + "anagram_groups": (self.config.min_anagram_groups, self.config.max_anagram_groups), + "words_per_group": (self.config.min_words_per_group, self.config.max_words_per_group), }, }, } diff --git a/reasoning_gym/algorithmic/isomorphic_strings.py b/reasoning_gym/algorithmic/isomorphic_strings.py index 1b539383..f2e2fb49 100644 --- a/reasoning_gym/algorithmic/isomorphic_strings.py +++ b/reasoning_gym/algorithmic/isomorphic_strings.py @@ -110,8 +110,9 @@ class IsomorphicStringsDataset(ProceduralDataset): "words": [s, t], "solution": answer, "solvable": solvable, + "string_length": string_length, "difficulty": { - "string_length": string_length, + "string_length": (self.config.min_string_length, self.config.max_string_length), }, }, } diff --git a/reasoning_gym/algorithmic/letter_counting.py b/reasoning_gym/algorithmic/letter_counting.py index e2262ffd..197596e5 100644 --- a/reasoning_gym/algorithmic/letter_counting.py +++ b/reasoning_gym/algorithmic/letter_counting.py @@ -67,7 +67,9 @@ class LetterCountingDataset(ProceduralDataset): "span_length": span_length, "target_letter": target_letter, "span": span, - "difficulty": {"words": span_length}, + "difficulty": { + "words": (self.config.min_words, self.config.max_words), + }, }, } diff --git a/reasoning_gym/algorithmic/letter_jumble.py b/reasoning_gym/algorithmic/letter_jumble.py index 0e896b93..7f56dfa8 100644 --- a/reasoning_gym/algorithmic/letter_jumble.py +++ b/reasoning_gym/algorithmic/letter_jumble.py @@ -110,8 +110,8 @@ class LetterJumbleDataset(ProceduralDataset): "original_words": selected_words, "difficulty": { "word_len": (self.config.min_word_len, self.config.max_word_len), - "words": num_words, - "corruption_level": corruption_level, + "words": (self.config.min_words, self.config.max_words), + "corruption_level": (self.config.min_corruption_level, self.config.max_corruption_level), }, }, } diff --git a/reasoning_gym/algorithmic/manipulate_matrix.py b/reasoning_gym/algorithmic/manipulate_matrix.py index e6193388..49fe4615 100644 --- a/reasoning_gym/algorithmic/manipulate_matrix.py +++ b/reasoning_gym/algorithmic/manipulate_matrix.py @@ -309,10 +309,13 @@ class ManipulateMatrixDataset(ProceduralDataset): "matrix": matrix, "solution": answer, "operations": operations, + "rows": rows, + "cols": cols, + "num_transforms": num_transforms, "difficulty": { - "rows": rows, - "cols": cols, - "num_transforms": num_transforms, + "rows": (self.config.min_rows, self.config.max_rows), + "cols": (self.config.min_cols, self.config.max_cols), + "num_transforms": (self.config.min_transforms, self.config.max_transforms), }, }, } diff --git a/reasoning_gym/algorithmic/number_filtering.py b/reasoning_gym/algorithmic/number_filtering.py index 6eeb335f..0561cd7d 100644 --- a/reasoning_gym/algorithmic/number_filtering.py +++ b/reasoning_gym/algorithmic/number_filtering.py @@ -95,8 +95,9 @@ class NumberFilteringDataset(ProceduralDataset): "filter_value": filter_str, "operation": f"{keep_remove}_{larger_smaller}", "result": result_strs, + "numbers": len(numbers), "difficulty": { - "numbers": len(numbers), + "numbers": (self.config.min_numbers, self.config.max_numbers), "decimals": (self.config.min_decimals, self.config.max_decimals), "value": (self.config.min_value, self.config.max_value), }, diff --git a/reasoning_gym/algorithmic/number_sorting.py b/reasoning_gym/algorithmic/number_sorting.py index 27c17743..35664b8e 100644 --- a/reasoning_gym/algorithmic/number_sorting.py +++ b/reasoning_gym/algorithmic/number_sorting.py @@ -93,8 +93,9 @@ Please follow the instruction below: "original_numbers": number_strs, "direction": direction, "sorted_numbers": answer, + "numbers": count, "difficulty": { - "numbers": count, + "numbers": (self.config.min_numbers, self.config.max_numbers), "decimals": (self.config.min_decimals, self.config.max_decimals), "value": (self.config.min_value, self.config.max_value), }, diff --git a/reasoning_gym/algorithmic/palindrome_generation.py b/reasoning_gym/algorithmic/palindrome_generation.py index 157d780d..92236b9a 100644 --- a/reasoning_gym/algorithmic/palindrome_generation.py +++ b/reasoning_gym/algorithmic/palindrome_generation.py @@ -69,8 +69,9 @@ class PalindromeDataset(ProceduralDataset): "metadata": { "letters": scrambled_letters, "generated_palindrome": palindrome, + "length": length, "difficulty": { - "length": length, + "length": (self.config.min_length, self.config.max_length), }, }, } diff --git a/reasoning_gym/algorithmic/palindrome_partitioning.py b/reasoning_gym/algorithmic/palindrome_partitioning.py index 103a6fbb..a7789a12 100644 --- a/reasoning_gym/algorithmic/palindrome_partitioning.py +++ b/reasoning_gym/algorithmic/palindrome_partitioning.py @@ -140,8 +140,13 @@ class PalindromePartitioningDataset(ProceduralDataset): "metadata": { "string": string, "solution": answer, + "string_len": string_len, "difficulty": { - "string_len": string_len, + "string_len": (self.config.min_string_len, self.config.max_string_len), + "substring_palindrome_len": ( + self.config.min_substring_palindrome_len, + self.config.max_substring_palindrome_len, + ), }, }, } diff --git a/reasoning_gym/algorithmic/pool_matrix.py b/reasoning_gym/algorithmic/pool_matrix.py index 24af56f9..3a703d83 100644 --- a/reasoning_gym/algorithmic/pool_matrix.py +++ b/reasoning_gym/algorithmic/pool_matrix.py @@ -117,10 +117,13 @@ class PoolMatrixDataset(ProceduralDataset): "pool_type": pool_type, "pool_size": pool_size, "solution": answer.tolist(), + "rows": rows, + "cols": cols, + "pool_size": pool_size, "difficulty": { - "rows": rows, - "cols": cols, - "pool_size": pool_size, + "rows": (self.config.min_rows, self.config.max_rows), + "cols": (self.config.min_cols, self.config.max_cols), + "pool_size": (self.config.min_pool_size, self.config.max_pool_size), }, }, } diff --git a/reasoning_gym/algorithmic/ransom_note.py b/reasoning_gym/algorithmic/ransom_note.py index 23851e11..5e32c0e9 100644 --- a/reasoning_gym/algorithmic/ransom_note.py +++ b/reasoning_gym/algorithmic/ransom_note.py @@ -103,9 +103,11 @@ class RansomNoteDataset(ProceduralDataset): "magazine": magazine, "solution": answer, "solvable": solvable, + "note_length": note_length, + "magazine_length": magazine_length, "difficulty": { - "note_length": note_length, - "magazine_length": magazine_length, + "note_length": (self.config.min_note_length, self.config.max_note_length), + "magazine_length": (self.config.min_magazine_length, self.config.max_magazine_length), }, }, } diff --git a/reasoning_gym/algorithmic/rotate_matrix.py b/reasoning_gym/algorithmic/rotate_matrix.py index ee91f8d1..0716e656 100644 --- a/reasoning_gym/algorithmic/rotate_matrix.py +++ b/reasoning_gym/algorithmic/rotate_matrix.py @@ -86,9 +86,10 @@ class RotateMatrixDataset(ProceduralDataset): "matrix": matrix, "num_rotations": num_rotations, "solution": answer, + "n": n, "difficulty": { - "n": n, - "num_rotations": num_rotations, + "n": (self.config.min_n, self.config.max_n), + "num_rotations": (self.config.min_rotations, self.config.max_rotations), }, }, } diff --git a/reasoning_gym/algorithmic/rotten_oranges.py b/reasoning_gym/algorithmic/rotten_oranges.py index f1de4318..f2dabadf 100644 --- a/reasoning_gym/algorithmic/rotten_oranges.py +++ b/reasoning_gym/algorithmic/rotten_oranges.py @@ -122,7 +122,10 @@ class RottenOrangesDataset(ProceduralDataset): "metadata": { "matrix": matrix, "solution": answer, - "difficulty": {"n": n}, + "n": n, + "difficulty": { + "n": (self.config.min_n, self.config.max_n), + }, }, } diff --git a/reasoning_gym/algorithmic/sentence_reordering.py b/reasoning_gym/algorithmic/sentence_reordering.py index c5eab985..29402b44 100644 --- a/reasoning_gym/algorithmic/sentence_reordering.py +++ b/reasoning_gym/algorithmic/sentence_reordering.py @@ -90,7 +90,12 @@ class SentenceReorderingDataset(ProceduralDataset): return { "question": f"Restore the correct order of words in the following sentence: {question}", "answer": solved_sentence, - "metadata": {"word_count": word_count, "difficulty": {"words_in_sentence": word_count}}, + "metadata": { + "word_count": word_count, + "difficulty": { + "words_in_sentence": (self.config.min_words_in_sentence, self.config.max_words_in_sentence), + }, + }, } def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float: diff --git a/reasoning_gym/algorithmic/spell_backward.py b/reasoning_gym/algorithmic/spell_backward.py index 24d980db..e4008038 100644 --- a/reasoning_gym/algorithmic/spell_backward.py +++ b/reasoning_gym/algorithmic/spell_backward.py @@ -54,7 +54,9 @@ class SpellBackwardDataset(ProceduralDataset): "metadata": { "word": word, "word_len": len(word), - "difficulty": {"word_len": (self.config.min_word_len, self.config.max_word_len)}, + "difficulty": { + "word_len": (self.config.min_word_len, self.config.max_word_len), + }, }, } diff --git a/reasoning_gym/algorithmic/spiral_matrix.py b/reasoning_gym/algorithmic/spiral_matrix.py index b5001e64..23fb6e37 100644 --- a/reasoning_gym/algorithmic/spiral_matrix.py +++ b/reasoning_gym/algorithmic/spiral_matrix.py @@ -101,7 +101,7 @@ class SpiralMatrixDataset(ProceduralDataset): """Generate a single Spiral Matrix question""" rng = Random(self.seed + idx) - n = rng.randint(2, self.config.max_n) + n = rng.randint(self.config.min_n, self.config.max_n) matrix = self._get_matrix(rng, n) matrix_str = self._matrix_to_str(matrix) answer = self._get_spiral(matrix) @@ -113,7 +113,10 @@ class SpiralMatrixDataset(ProceduralDataset): "metadata": { "matrix": matrix, "solution": answer, - "difficulty": {"n": n}, + "n": n, + "difficulty": { + "n": (self.config.min_n, self.config.max_n), + }, }, } diff --git a/reasoning_gym/algorithmic/string_insertion.py b/reasoning_gym/algorithmic/string_insertion.py index 02ae4ea1..3c707bf2 100644 --- a/reasoning_gym/algorithmic/string_insertion.py +++ b/reasoning_gym/algorithmic/string_insertion.py @@ -104,8 +104,9 @@ class StringInsertionDataset(ProceduralDataset): "metadata": { "string": string, "solution": answer, + "string_length": string_length, "difficulty": { - "string_length": string_length, + "string_length": (self.config.min_string_length, self.config.max_string_length), }, }, } diff --git a/reasoning_gym/algorithmic/string_manipulation.py b/reasoning_gym/algorithmic/string_manipulation.py index 73deb1de..fb6e3e33 100644 --- a/reasoning_gym/algorithmic/string_manipulation.py +++ b/reasoning_gym/algorithmic/string_manipulation.py @@ -183,9 +183,11 @@ class StringManipulationDataset(ProceduralDataset): "solution": answer, "states": states, "selected_rules": [rule for rule, _ in selected_rules], + "string_length": string_length, + "num_rules": num_rules, "difficulty": { - "string_length": string_length, - "num_rules": num_rules, + "string_length": (self.config.min_string_length, self.config.max_string_length), + "num_rules": (self.config.min_num_rules, self.config.max_num_rules), }, }, } diff --git a/reasoning_gym/algorithmic/string_splitting.py b/reasoning_gym/algorithmic/string_splitting.py index 82096175..4ac1ed43 100644 --- a/reasoning_gym/algorithmic/string_splitting.py +++ b/reasoning_gym/algorithmic/string_splitting.py @@ -127,8 +127,9 @@ class StringSplittingDataset(ProceduralDataset): "metadata": { "states": states, "solution": answer, + "initial_machines": (A_machine, B_machine, C_machine), "difficulty": { - "initial_machines": (A_machine, B_machine, C_machine), + "initial_machines": (self.config.min_initial_machines, self.config.max_initial_machines), }, }, } diff --git a/reasoning_gym/algorithmic/string_synthesis.py b/reasoning_gym/algorithmic/string_synthesis.py index 7742c6d0..2edc805b 100644 --- a/reasoning_gym/algorithmic/string_synthesis.py +++ b/reasoning_gym/algorithmic/string_synthesis.py @@ -132,8 +132,9 @@ class StringSynthesisDataset(ProceduralDataset): "metadata": { "states": states, "solution": answer, + "initial_blocks": (A_square, B_square, C_square), "difficulty": { - "initial_blocks": (A_square, B_square, C_square), + "initial_blocks": (self.config.min_initial_blocks, self.config.max_initial_blocks), }, }, } diff --git a/reasoning_gym/algorithmic/word_ladder.py b/reasoning_gym/algorithmic/word_ladder.py index 9d8cb89d..9d3d8429 100644 --- a/reasoning_gym/algorithmic/word_ladder.py +++ b/reasoning_gym/algorithmic/word_ladder.py @@ -224,8 +224,7 @@ class WordLadderDataset(ProceduralDataset): "word_length": length, "chain_length": len(path), "difficulty": { - "word_length": length, - "chain_length": len(path), + "word_length": (self.config.min_word_length, self.config.max_word_length), }, }, } diff --git a/reasoning_gym/algorithmic/word_sequence_reversal.py b/reasoning_gym/algorithmic/word_sequence_reversal.py index f0b2f68c..b7b41b08 100644 --- a/reasoning_gym/algorithmic/word_sequence_reversal.py +++ b/reasoning_gym/algorithmic/word_sequence_reversal.py @@ -62,7 +62,13 @@ class WordSequenceReversalDataset(ProceduralDataset): return { "question": f"{QUESTION_TEMPLATE.format(words=words_str)}", "answer": answer, - "metadata": {"num_words": num_words, "words": words, "difficulty": {"words": num_words}}, + "metadata": { + "num_words": num_words, + "words": words, + "difficulty": { + "words": (self.config.min_words, self.config.max_words), + }, + }, } diff --git a/reasoning_gym/algorithmic/word_sorting.py b/reasoning_gym/algorithmic/word_sorting.py index 51b94946..fd514cc7 100644 --- a/reasoning_gym/algorithmic/word_sorting.py +++ b/reasoning_gym/algorithmic/word_sorting.py @@ -106,14 +106,16 @@ class WordSortingDataset(ProceduralDataset): "question": QUESTION_TEMPLATE.format(direction=direction, words=", ".join(transformed_words)), "answer": ", ".join(answer), "metadata": { - "difficulty": { - "num_words": len(original_words), - "word_length": max(len(word) for word in original_words), - }, "original_words": original_words, "sorted_words": answer, "transformed_words": transformed_words, "direction": direction, + "num_words": len(original_words), + "word_length": max(len(word) for word in original_words), + "difficulty": { + "num_words": (self.config.min_words, self.config.max_words), + "word_length": (self.config.min_word_length, self.config.max_word_length), + }, }, } diff --git a/reasoning_gym/arc/rearc.py b/reasoning_gym/arc/rearc.py index 4b74c8fa..83adedbf 100644 --- a/reasoning_gym/arc/rearc.py +++ b/reasoning_gym/arc/rearc.py @@ -117,9 +117,11 @@ class ReArcDataset(ProceduralDataset): "input": task["input"], "output": task["output"], "task_id": task_id, + "rng": rng_difficulty, + "pso": pso_difficulty, "difficulty": { - "rng": rng_difficulty, - "pso": pso_difficulty, + "rng_difficulty": self.config.rng_difficulty_weights, + "pso_difficulty": self.config.pso_difficulty_weights, }, }, } diff --git a/reasoning_gym/arithmetic/basic_arithmetic.py b/reasoning_gym/arithmetic/basic_arithmetic.py index dcffd2fc..42dbcf02 100644 --- a/reasoning_gym/arithmetic/basic_arithmetic.py +++ b/reasoning_gym/arithmetic/basic_arithmetic.py @@ -96,7 +96,12 @@ class BasicArithmeticDataset(ProceduralDataset): "answer": str(result), "metadata": { "expression": expression, - "difficulty": {"num_terms": num_terms, "num_digits": num_digits}, + "num_terms": num_terms, + "num_digits": num_digits, + "difficulty": { + "num_terms": (self.config.min_terms, self.config.max_terms), + "num_digits": (self.config.min_digits, self.config.max_digits), + }, }, } diff --git a/reasoning_gym/arithmetic/chain_sum.py b/reasoning_gym/arithmetic/chain_sum.py index 26e20e68..8b0a3a19 100644 --- a/reasoning_gym/arithmetic/chain_sum.py +++ b/reasoning_gym/arithmetic/chain_sum.py @@ -64,11 +64,13 @@ class ChainSumDataset(ProceduralDataset): "question": f"State the final answer to the following arithmetic problem: {expression} =", "answer": str(result), "metadata": { - "difficulty": { - "num_terms": num_terms, - "num_digits": num_digits, - }, + "num_terms": num_terms, + "num_digits": num_digits, "expression": expression, + "difficulty": { + "num_terms": (self.config.min_terms, self.config.max_terms), + "num_digits": (self.config.min_digits, self.config.max_digits), + }, }, } diff --git a/reasoning_gym/arithmetic/count_bits.py b/reasoning_gym/arithmetic/count_bits.py index c2a6d61f..a3557f3c 100644 --- a/reasoning_gym/arithmetic/count_bits.py +++ b/reasoning_gym/arithmetic/count_bits.py @@ -46,7 +46,10 @@ class CountBitsDataset(ProceduralDataset): "number": number, "solution": answer, "binary": binary, - "difficulty": {"n": number}, + "n": number, + "difficulty": { + "n": (self.config.min_n, self.config.max_n), + }, }, } diff --git a/reasoning_gym/arithmetic/decimal_arithmetic.py b/reasoning_gym/arithmetic/decimal_arithmetic.py index 806a442b..34ce306c 100644 --- a/reasoning_gym/arithmetic/decimal_arithmetic.py +++ b/reasoning_gym/arithmetic/decimal_arithmetic.py @@ -189,9 +189,11 @@ class DecimalArithmeticDataset(ProceduralDataset): "question": problem_str, "answer": str(answer), "metadata": { + "decimal_places": decimal_places, + "num_terms": terms, "difficulty": { - "decimal_places": decimal_places, - "num_terms": terms, + "decimal_places": (self.config.min_num_decimal_places, self.config.max_num_decimal_places), + "num_terms": (self.config.min_terms, self.config.max_terms), }, }, } diff --git a/reasoning_gym/arithmetic/decimal_chain_sum.py b/reasoning_gym/arithmetic/decimal_chain_sum.py index 92be2f2b..057203ce 100644 --- a/reasoning_gym/arithmetic/decimal_chain_sum.py +++ b/reasoning_gym/arithmetic/decimal_chain_sum.py @@ -66,11 +66,14 @@ class DecimalChainSumDataset(ProceduralDataset): "question": f"State the final answer to the following arithmetic problem: {expression} =", "answer": str(result), "metadata": { - "difficulty": { - "num_terms": num_terms, - "num_digits": num_digits, - }, + "num_terms": num_terms, + "num_digits": num_digits, "expression": expression, + "difficulty": { + "num_terms": (self.config.min_terms, self.config.max_terms), + "num_digits": (self.config.min_digits, self.config.max_digits), + "decimal_places": (self.config.min_decimal_places, self.config.max_decimal_places), + }, }, } diff --git a/reasoning_gym/arithmetic/dice.py b/reasoning_gym/arithmetic/dice.py index c3a58fac..8bf19981 100644 --- a/reasoning_gym/arithmetic/dice.py +++ b/reasoning_gym/arithmetic/dice.py @@ -124,11 +124,11 @@ class DiceDataset(ProceduralDataset): "question": puzzle_str, "answer": answer_str, "metadata": { + "puzzle": puzzle, "difficulty": { "num_dice": self.config.num_dice, "max_dice_size": self.config.max_dice_size, }, - "puzzle": puzzle, }, } diff --git a/reasoning_gym/arithmetic/fraction_simplification.py b/reasoning_gym/arithmetic/fraction_simplification.py index 2b8c8453..2176cfef 100644 --- a/reasoning_gym/arithmetic/fraction_simplification.py +++ b/reasoning_gym/arithmetic/fraction_simplification.py @@ -120,9 +120,10 @@ class FractionSimplificationDataset(ProceduralDataset): "simplified_denominator": simple_den, "reduction_factor": num // simple_num, # Will be same as den // simple_den "style": style, + "factor": factor, "difficulty": { - "factor": factor, - "value": (simple_num, simple_den), + "value": (self.config.min_value, self.config.max_value), + "factor": (self.config.min_factor, self.config.max_factor), }, }, } diff --git a/reasoning_gym/arithmetic/gcd.py b/reasoning_gym/arithmetic/gcd.py index c8efba3a..33b1e061 100644 --- a/reasoning_gym/arithmetic/gcd.py +++ b/reasoning_gym/arithmetic/gcd.py @@ -64,9 +64,10 @@ class GCDDataset(ProceduralDataset): "metadata": { "numbers": numbers, "result": result, + "num_terms": num_terms, "difficulty": { - "num_terms": num_terms, - "max_value": self.config.max_value, + "num_terms": (self.config.min_numbers, self.config.max_numbers), + "max_value": (self.config.min_value, self.config.max_value), }, }, } diff --git a/reasoning_gym/arithmetic/lcm.py b/reasoning_gym/arithmetic/lcm.py index a8f6c5ea..7f121a31 100644 --- a/reasoning_gym/arithmetic/lcm.py +++ b/reasoning_gym/arithmetic/lcm.py @@ -67,7 +67,7 @@ class LCMDataset(ProceduralDataset): "numbers": numbers, "result": result, "difficulty": { - "numbers": len(numbers), + "numbers": (self.config.min_numbers, self.config.max_numbers), "value": (self.config.min_value, self.config.max_value), }, }, diff --git a/reasoning_gym/arithmetic/leg_counting.py b/reasoning_gym/arithmetic/leg_counting.py index b2103858..91b65251 100644 --- a/reasoning_gym/arithmetic/leg_counting.py +++ b/reasoning_gym/arithmetic/leg_counting.py @@ -118,11 +118,13 @@ class LegCountingDataset(ProceduralDataset): "question": QUESTION_TEMPLATE.format(animals=", ".join(animal_list)), "answer": str(total_legs), "metadata": { - "difficulty": { - "num_animals": len(animals), - }, "animals": animals, + "num_animals": len(animals), "total_legs": total_legs, + "difficulty": { + "num_animals": (self.config.min_animals, self.config.max_animals), + "num_instances": (self.config.min_instances, self.config.max_instances), + }, }, } diff --git a/reasoning_gym/arithmetic/number_format.py b/reasoning_gym/arithmetic/number_format.py index 44515254..955be7a8 100644 --- a/reasoning_gym/arithmetic/number_format.py +++ b/reasoning_gym/arithmetic/number_format.py @@ -98,8 +98,9 @@ class NumberFormatDataset(ProceduralDataset): "solution": answer, "formatted_candidates": formatted_candidates, "size": size, + "num_candidates": num_candidates, "difficulty": { - "num_candidates": num_candidates, + "num_candidates": (self.config.min_num_candidates, self.config.max_num_candidates), "n": (self.config.min_n, self.config.max_n), "min_delta": self.config.max_delta, }, diff --git a/reasoning_gym/arithmetic/power_function.py b/reasoning_gym/arithmetic/power_function.py index ff32404c..931a7cbf 100644 --- a/reasoning_gym/arithmetic/power_function.py +++ b/reasoning_gym/arithmetic/power_function.py @@ -73,7 +73,14 @@ class PowerFunctionDataset(ProceduralDataset): return { "question": QUESTION_TEMPLATE.format(base=base, exponent=exponent), "answer": str(answer), - "metadata": {"base": base, "exponent": exponent, "solution": answer, "difficulty": {"exponent": exponent}}, + "metadata": { + "base": base, + "exponent": exponent, + "solution": answer, + "difficulty": { + "exponent": (self.config.min_exponent, self.config.max_exponent), + }, + }, } diff --git a/reasoning_gym/arithmetic/prime_factorization.py b/reasoning_gym/arithmetic/prime_factorization.py index 46d05286..b14d6b1b 100644 --- a/reasoning_gym/arithmetic/prime_factorization.py +++ b/reasoning_gym/arithmetic/prime_factorization.py @@ -83,7 +83,13 @@ class PrimeFactorizationDataset(ProceduralDataset): f"(Example: for 12 the answer would be: 2 × 2 × 3)" ), "answer": answer, - "metadata": {"number": number, "factors": factors, "difficulty": {"value": number}}, + "metadata": { + "number": number, + "factors": factors, + "difficulty": { + "value": (self.config.min_value, self.config.max_value), + }, + }, } diff --git a/reasoning_gym/arithmetic/products.py b/reasoning_gym/arithmetic/products.py index 4216a96d..b9d38961 100644 --- a/reasoning_gym/arithmetic/products.py +++ b/reasoning_gym/arithmetic/products.py @@ -66,11 +66,13 @@ class ProductsDataset(ProceduralDataset): "question": f"Solve the following multiplication: {expression}. Give only the result as your final answer.", "answer": str(result), "metadata": { - "difficulty": { - "num_terms": num_terms, - "num_digits": num_digits, - }, "expression": expression, + "num_terms": num_terms, + "num_digits": num_digits, + "difficulty": { + "num_terms": (self.config.min_terms, self.config.max_terms), + "num_digits": (self.config.min_digits, self.config.max_digits), + }, }, } diff --git a/reasoning_gym/cognition/color_cube_rotation.py b/reasoning_gym/cognition/color_cube_rotation.py index 1b6e0b68..1b1e486c 100644 --- a/reasoning_gym/cognition/color_cube_rotation.py +++ b/reasoning_gym/cognition/color_cube_rotation.py @@ -141,7 +141,9 @@ class ColorCubeRotationDataset(ProceduralDataset): "rotations": [r.value for r in rotations], "target_side": target_side.value, "num_rotations": num_rotations, - "difficulty": {"rotations": num_rotations}, + "difficulty": { + "rotations": (self.config.min_rotations, self.config.max_rotations), + }, }, } diff --git a/reasoning_gym/cognition/figlet_fonts.py b/reasoning_gym/cognition/figlet_fonts.py index f9abc1f7..715fc79e 100644 --- a/reasoning_gym/cognition/figlet_fonts.py +++ b/reasoning_gym/cognition/figlet_fonts.py @@ -188,7 +188,9 @@ class FigletFontDataset(ProceduralDataset): "metadata": { "font": chosen_font, "space_letters": self.config.space_letters, - "difficulty": {"word_len": len(word)}, + "difficulty": { + "word_len": (self.config.min_word_len, self.config.max_word_len), + }, }, } diff --git a/reasoning_gym/cognition/modulo_grid.py b/reasoning_gym/cognition/modulo_grid.py index 45ab2a56..88debfab 100644 --- a/reasoning_gym/cognition/modulo_grid.py +++ b/reasoning_gym/cognition/modulo_grid.py @@ -140,9 +140,11 @@ class ModuloGridDataset(ProceduralDataset): "target": target, "operation": operation, "difficulty": { - "holes": self.config.max_holes, "size_x": self.config.size_x, "size_y": self.config.size_y, + "holes": self.config.max_holes, + "divisor": self.config.max_divisor, + "target": self.config.max_target, }, }, } diff --git a/reasoning_gym/cognition/needle_haystack.py b/reasoning_gym/cognition/needle_haystack.py index cc98765a..2eb8ee55 100644 --- a/reasoning_gym/cognition/needle_haystack.py +++ b/reasoning_gym/cognition/needle_haystack.py @@ -103,7 +103,13 @@ class NeedleHaystackDataset(ProceduralDataset): return { "question": full_text, "answer": stack["needle"][0], - "metadata": {"question": question, "difficulty": {"num_statements": num_statements}}, + "metadata": { + "question": question, + "num_statements": num_statements, + "difficulty": { + "num_statements": (self.config.min_num_statements, self.config.max_num_statements), + }, + }, } def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float: diff --git a/reasoning_gym/cognition/number_sequences.py b/reasoning_gym/cognition/number_sequences.py index 10f7253c..6b9b4619 100644 --- a/reasoning_gym/cognition/number_sequences.py +++ b/reasoning_gym/cognition/number_sequences.py @@ -195,7 +195,14 @@ class NumberSequenceDataset(ProceduralDataset): return { "question": ", ".join(map(str, visible_terms)) + ", ?", "answer": str(sequence[-1]), - "metadata": {"rule": rule.to_string(), "complexity": complexity, "sequence": sequence}, + "metadata": { + "rule": rule.to_string(), + "complexity": complexity, + "sequence": sequence, + "difficulty": { + "max_complexity": self.config.max_complexity, + }, + }, } diff --git a/reasoning_gym/cognition/rectangle_count.py b/reasoning_gym/cognition/rectangle_count.py index 61a9b76b..2fb02dca 100644 --- a/reasoning_gym/cognition/rectangle_count.py +++ b/reasoning_gym/cognition/rectangle_count.py @@ -117,7 +117,14 @@ class RectangleCountDataset(ProceduralDataset): return { "question": QUESTION_TEMPLATE.format(puzzle=puzzle), "answer": str(answer), - "metadata": {"puzzle": puzzle, "solution": answer, "difficulty": {"max_rectangles": target}}, + "metadata": { + "puzzle": puzzle, + "solution": answer, + "num_rectangles": target, + "difficulty": { + "max_rectangles": self.config.max_rectangles, + }, + }, } def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float: diff --git a/reasoning_gym/cognition/rubiks_cube.py b/reasoning_gym/cognition/rubiks_cube.py index fb728ea6..25b236f6 100644 --- a/reasoning_gym/cognition/rubiks_cube.py +++ b/reasoning_gym/cognition/rubiks_cube.py @@ -110,8 +110,8 @@ class RubiksCubeDataset(ProceduralDataset): "scramble_moves": " ".join([str(move) for move in scramble_moves]), "example_correct_answer": actions_string, "difficulty": { - "scramble_steps": num_steps, "cube_size": self.config.cube_size, + "scramble_steps": (self.config.min_scramble_steps, self.config.max_scramble_steps), }, }, } diff --git a/reasoning_gym/games/boxnet.py b/reasoning_gym/games/boxnet.py index 21d77c39..312cddca 100644 --- a/reasoning_gym/games/boxnet.py +++ b/reasoning_gym/games/boxnet.py @@ -126,11 +126,14 @@ class BoxnetDataset(ProceduralDataset): "question": question, "answer": None, "metadata": { - "difficulty": { - "row_num": row_num, - "column_num": column_num, - }, + "row_num": row_num, + "column_num": column_num, "initial_state": pg_dict, + "difficulty": { + "row_num": (self.config.min_row_num, self.config.max_row_num), + "column_num": (self.config.min_column_num, self.config.max_column_num), + "box_num": (self.config.min_box_num, self.config.max_box_num), + }, }, } diff --git a/reasoning_gym/games/emoji_mystery.py b/reasoning_gym/games/emoji_mystery.py index f4a12a0e..d41b7156 100644 --- a/reasoning_gym/games/emoji_mystery.py +++ b/reasoning_gym/games/emoji_mystery.py @@ -194,7 +194,10 @@ class EmojiMysteryDataset(ProceduralDataset): "answer": secret_sentence, "metadata": { "emoji": secret_emoji, - "difficulty": {"num_words_in_sentence": len(re.findall(r"\b\w+\b", secret_sentence))}, + "num_words_in_sentence": len(re.findall(r"\b\w+\b", secret_sentence)), + "difficulty": { + "num_words_in_sentence": (self.config.min_words_in_sentence, self.config.max_words_in_sentence), + }, }, } diff --git a/reasoning_gym/games/futoshiki.py b/reasoning_gym/games/futoshiki.py index 4eb18489..9b270213 100644 --- a/reasoning_gym/games/futoshiki.py +++ b/reasoning_gym/games/futoshiki.py @@ -84,7 +84,12 @@ class FutoshikiDataset(ProceduralDataset): "puzzle": puzzle, "constraints": constraints, "solution": solution, - "difficulty": {"board_size": board_size, "difficulty": difficulty}, + "board_size": board_size, + "difficulty_rating": difficulty, + "difficulty": { + "board_size": (self.config.min_board_size, self.config.max_board_size), + "difficulty": (self.config.min_difficulty, self.config.max_difficulty), + }, }, } diff --git a/reasoning_gym/games/mahjong.py b/reasoning_gym/games/mahjong.py index 7021df09..1f380167 100644 --- a/reasoning_gym/games/mahjong.py +++ b/reasoning_gym/games/mahjong.py @@ -122,7 +122,9 @@ class MahjongPuzzleDataset(ProceduralDataset): "metadata": { "rounds": rounds, "solution": answer, - "difficulty": {"num_rounds": num_rounds}, + "difficulty": { + "num_rounds": (self.config.min_num_rounds, self.config.max_num_rounds), + }, }, } diff --git a/reasoning_gym/games/maze.py b/reasoning_gym/games/maze.py index e2f4f50e..a6305cc3 100644 --- a/reasoning_gym/games/maze.py +++ b/reasoning_gym/games/maze.py @@ -112,8 +112,8 @@ class MazeDataset(ProceduralDataset): "wall": self.wall_char, "path": self.path_char, "difficulty": { - "dist": dist, - "grid_size": size, + "dist": (self.config.min_dist, self.config.max_dist), + "grid_size": (self.config.min_grid_size, self.config.max_grid_size), }, }, } diff --git a/reasoning_gym/games/mini_sudoku.py b/reasoning_gym/games/mini_sudoku.py index ee9ea1a4..780795b7 100644 --- a/reasoning_gym/games/mini_sudoku.py +++ b/reasoning_gym/games/mini_sudoku.py @@ -197,7 +197,7 @@ class MiniSudokuDataset(ProceduralDataset): "solution": solved_board, "num_empty": num_empty, "difficulty": { - "empty": num_empty, + "empty": (self.config.min_empty, self.config.max_empty), }, }, } diff --git a/reasoning_gym/games/n_queens.py b/reasoning_gym/games/n_queens.py index d360e5cb..cef4d1a1 100644 --- a/reasoning_gym/games/n_queens.py +++ b/reasoning_gym/games/n_queens.py @@ -137,7 +137,7 @@ class NQueensDataset(ProceduralDataset): "valid_answers": valid_solutions_str, "difficulty": { "n": self.config.n, - "num_removed": num_removed, + "num_removed": (self.config.min_remove, self.config.max_remove), }, }, } diff --git a/reasoning_gym/games/rush_hour.py b/reasoning_gym/games/rush_hour.py index 1ade0d28..ba3e49ff 100644 --- a/reasoning_gym/games/rush_hour.py +++ b/reasoning_gym/games/rush_hour.py @@ -161,7 +161,9 @@ class RushHourDataset(ProceduralDataset): "metadata": { "board_config": board_config, "min_moves": min_moves, - "difficulty": {"min_moves": min_moves}, + "difficulty": { + "min_moves": (self.config.min_moves, self.config.max_moves), + }, }, } diff --git a/reasoning_gym/games/sokoban.py b/reasoning_gym/games/sokoban.py index 4cdeebdf..857139f8 100644 --- a/reasoning_gym/games/sokoban.py +++ b/reasoning_gym/games/sokoban.py @@ -65,7 +65,7 @@ class SokobanDataset(ProceduralDataset): # Make the Sokoban! rng = Random(self.seed + idx) - gamestr, solution, difficulty = self._generate( + gamestr, solution, puzzle_data = self._generate( rng=rng, min_w=self.config.min_w, min_h=self.config.min_h, @@ -93,7 +93,15 @@ Here is your puzzle: """ + gamestr, "answer": solution, - "metadata": {"gamestr": gamestr, "difficulty": difficulty}, + "metadata": { + "gamestr": gamestr, + "width": puzzle_data["width"], + "height": puzzle_data["height"], + "difficulty": { + "width": (self.config.min_w, self.config.max_w), + "height": (self.config.min_h, self.config.max_h), + }, + }, } def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float: diff --git a/reasoning_gym/games/sudoku.py b/reasoning_gym/games/sudoku.py index 3ad7f3aa..c3351a91 100644 --- a/reasoning_gym/games/sudoku.py +++ b/reasoning_gym/games/sudoku.py @@ -216,7 +216,7 @@ class SudokuDataset(ProceduralDataset): "solution": solved_board, "num_empty": num_empty, "difficulty": { - "num_empty": num_empty, + "empty": (self.config.min_empty, self.config.max_empty), }, }, } diff --git a/reasoning_gym/games/tower_of_hanoi.py b/reasoning_gym/games/tower_of_hanoi.py index e3f3444a..f89f7f7c 100644 --- a/reasoning_gym/games/tower_of_hanoi.py +++ b/reasoning_gym/games/tower_of_hanoi.py @@ -275,6 +275,9 @@ class HanoiDataset(ProceduralDataset): "target_peg": target_peg, "auxiliary_pegs": auxiliary_pegs, "solution_length": len(solution), + "difficulty": { + "num_disks": (self.min_disks, self.max_disks), + }, }, } diff --git a/reasoning_gym/games/tsumego.py b/reasoning_gym/games/tsumego.py index 0c1a9461..ba0475ee 100644 --- a/reasoning_gym/games/tsumego.py +++ b/reasoning_gym/games/tsumego.py @@ -270,7 +270,13 @@ class TsumegoDataset(ProceduralDataset): "Specify your move in coordinates (e.g. 'C4' for column C, row 4)" ), "answer": solution_str, - "metadata": {"difficulty": {"board_size": size}, "board": board}, + "metadata": { + "board": board, + "board_size": size, + "difficulty": { + "board_size": (self.config.min_board_size, self.config.max_board_size), + }, + }, } def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float: diff --git a/reasoning_gym/geometry/simple_geometry.py b/reasoning_gym/geometry/simple_geometry.py index ca938318..0f151d1f 100644 --- a/reasoning_gym/geometry/simple_geometry.py +++ b/reasoning_gym/geometry/simple_geometry.py @@ -115,7 +115,9 @@ class SimpleGeometryDataset(ProceduralDataset): "missing_angle_raw": missing_angle, "missing_angle_rounded": missing_angle_rounded, "total_interior_sum": total_sum, - "difficulty": {"sides": n_sides}, + "difficulty": { + "sides": (self.config.min_sides, self.config.max_sides), + }, }, } diff --git a/reasoning_gym/graphs/course_schedule.py b/reasoning_gym/graphs/course_schedule.py index 0db3e1b4..c18d89c0 100644 --- a/reasoning_gym/graphs/course_schedule.py +++ b/reasoning_gym/graphs/course_schedule.py @@ -136,7 +136,11 @@ class CourseScheduleDataset(ProceduralDataset): "prerequisites": prerequisites, "solution": answer, "solvable": solvable, - "difficulty": {"num_courses": num_courses}, + "difficulty": { + "num_courses": (self.config.min_num_courses, self.config.max_num_courses), + "num_prerequisites": (self.config.min_num_prerequisites, self.config.max_num_prerequisites), + "cycle_length": (self.config.min_cycle_length, self.config.max_cycle_length), + }, }, } diff --git a/reasoning_gym/graphs/family_relationships.py b/reasoning_gym/graphs/family_relationships.py index ce8d5fcd..a410a04b 100644 --- a/reasoning_gym/graphs/family_relationships.py +++ b/reasoning_gym/graphs/family_relationships.py @@ -206,7 +206,7 @@ class FamilyRelationshipsDataset(ProceduralDataset): "relationship": relationship.value, "family_size": len(family), "difficulty": { - "family_size": len(family), + "family_size": (self.config.min_family_size, self.config.max_family_size), }, }, } diff --git a/reasoning_gym/graphs/largest_island.py b/reasoning_gym/graphs/largest_island.py index acd93912..d3077fba 100644 --- a/reasoning_gym/graphs/largest_island.py +++ b/reasoning_gym/graphs/largest_island.py @@ -142,9 +142,10 @@ class LargestIslandDataset(ProceduralDataset): "grid": grid, "solution": answer, "difficulty": { - "rows": rows, - "cols": cols, - "num_islands": num_islands, + "rows": (self.config.min_rows, self.config.max_rows), + "cols": (self.config.min_cols, self.config.max_cols), + "num_islands": (self.config.min_num_islands, self.config.max_num_islands), + "island_size": (self.config.min_island_size, self.config.max_island_size), }, }, } diff --git a/reasoning_gym/graphs/quantum_lock.py b/reasoning_gym/graphs/quantum_lock.py index 76b4a30b..5256502f 100644 --- a/reasoning_gym/graphs/quantum_lock.py +++ b/reasoning_gym/graphs/quantum_lock.py @@ -56,12 +56,12 @@ Buttons: "question": self.format_puzzle(rng.choice(self._prompt_templates), puzzle=puzzle_data), "answer": " → ".join(puzzle_data["solution"]), "metadata": { - "metadata": {"difficulty": difficulty}, "solution_path": puzzle_data["solution"], "target_value": puzzle_data["target_value"], "buttons": puzzle_data["buttons"], "initial_state": puzzle_data["initial_state"], "initial_value": puzzle_data["initial_value"], + "difficulty": {"difficulty": difficulty}, }, } diff --git a/reasoning_gym/graphs/shortest_path.py b/reasoning_gym/graphs/shortest_path.py index 0021933d..93b91b6d 100644 --- a/reasoning_gym/graphs/shortest_path.py +++ b/reasoning_gym/graphs/shortest_path.py @@ -162,8 +162,8 @@ class ShortestPathDataset(ProceduralDataset): "matrix": matrix, "solution": answer, "difficulty": { - "rows": rows, - "cols": cols, + "rows": (self.config.min_rows, self.config.max_rows), + "cols": (self.config.min_cols, self.config.max_cols), }, }, } diff --git a/reasoning_gym/logic/circuit_logic.py b/reasoning_gym/logic/circuit_logic.py index c7868dbb..45729768 100644 --- a/reasoning_gym/logic/circuit_logic.py +++ b/reasoning_gym/logic/circuit_logic.py @@ -387,7 +387,7 @@ class CircuitLogicDataset(ProceduralDataset): "final_gate": final_gate_name, "inputs": inputs_list, "difficulty": { - "terms": num_terms, + "terms": (self.config.min_terms, self.config.max_terms), "inputs": (self.config.min_inputs, self.config.max_inputs), }, }, diff --git a/reasoning_gym/logic/propositional_logic.py b/reasoning_gym/logic/propositional_logic.py index f80cccdd..c67d3eb9 100644 --- a/reasoning_gym/logic/propositional_logic.py +++ b/reasoning_gym/logic/propositional_logic.py @@ -221,8 +221,8 @@ class PropositionalLogicDataset(ProceduralDataset): "complexity": self._measure_complexity(conclusion), "example_answer": str(conclusion), "difficulty": { - "vars": num_vars, - "statements": num_statements, + "vars": (self.config.min_vars, self.config.max_vars), + "statements": (self.config.min_statements, self.config.max_statements), "complexity": (self.config.min_complexity, self.config.max_complexity), }, }, diff --git a/reasoning_gym/logic/self_reference.py b/reasoning_gym/logic/self_reference.py index df89c4f2..9568e388 100644 --- a/reasoning_gym/logic/self_reference.py +++ b/reasoning_gym/logic/self_reference.py @@ -346,7 +346,9 @@ class SelfReferenceDataset(ProceduralDataset): return { "question": puzz_s, "answer": answer, - "metadata": {"difficulty": difficulty}, + "metadata": { + "difficulty": {"difficulty": difficulty}, + }, } def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float: diff --git a/tests/test_boxnet.py b/tests/test_boxnet.py index e3d8bfa7..e6313f9b 100644 --- a/tests/test_boxnet.py +++ b/tests/test_boxnet.py @@ -43,8 +43,8 @@ def test_boxnet_items(): assert "initial_state" in item["metadata"] # Verify row_num and column_num are within limits - row_num = item["metadata"]["difficulty"]["row_num"] - column_num = item["metadata"]["difficulty"]["column_num"] + row_num = item["metadata"]["row_num"] + column_num = item["metadata"]["column_num"] assert 1 <= row_num <= 2, f"row_num {row_num} outside valid range" assert 1 <= column_num <= 2, f"column_num {column_num} outside valid range" @@ -78,8 +78,8 @@ def test_boxnet_grid_sizes(): for i in range(len(dataset)): item = dataset[i] - row_num = item["metadata"]["difficulty"]["row_num"] - column_num = item["metadata"]["difficulty"]["column_num"] + row_num = item["metadata"]["row_num"] + column_num = item["metadata"]["column_num"] rows_set.add(row_num) columns_set.add(column_num) diff --git a/tests/test_coaching.py b/tests/test_coaching.py index 56e58548..01db2bc9 100644 --- a/tests/test_coaching.py +++ b/tests/test_coaching.py @@ -53,11 +53,15 @@ def test_coach_with_chain_sum(): # Each key should be a tuple of tuples containing difficulty parameters for key in aggregated.scores: assert isinstance(key, tuple) - # Each inner tuple should be (param_name, value) + # Each inner tuple should be (param_name, value) or (param_name, (min_value, max_value)) for param in key: assert isinstance(param, tuple) assert param[0] in ("num_terms", "num_digits") - assert isinstance(param[1], int) + assert ( + isinstance(param[1], int) + or (isinstance(param[1], tuple) and len(param[1]) == 2) + and all(isinstance(v, int) for v in param[1]) + ) # Test aggregation with last_n last_3 = coach.score_board.aggregate(last_n=3) @@ -171,7 +175,7 @@ def test_coach_with_composite(): item = coach[i + 5] # Use different indices if "chain_sum" in item["metadata"]["source_dataset"]: metadata = item["metadata"] - assert metadata["difficulty"]["num_terms"] >= 4 + assert metadata["num_terms"] >= 4 def test_grouped_scores_str(): diff --git a/tests/test_rearc.py b/tests/test_rearc.py index 1d035774..c30a4384 100644 --- a/tests/test_rearc.py +++ b/tests/test_rearc.py @@ -38,12 +38,12 @@ def test_rearc_items(): assert "input" in meta assert "output" in meta assert "task_id" in meta - assert "rng" in meta["difficulty"] - assert "pso" in meta["difficulty"] + assert "rng" in meta + assert "pso" in meta # Validate difficulty bounds - assert config.diff_lb <= meta["difficulty"]["rng"] <= config.diff_ub - assert config.diff_lb <= meta["difficulty"]["pso"] <= config.diff_ub + assert config.diff_lb <= meta["rng"] <= config.diff_ub + assert config.diff_lb <= meta["pso"] <= config.diff_ub def test_rearc_solution_validation(): diff --git a/tests/test_tsumego.py b/tests/test_tsumego.py index 86383701..a05ce56f 100644 --- a/tests/test_tsumego.py +++ b/tests/test_tsumego.py @@ -124,7 +124,7 @@ def test_score_answer(): # test optimal score for answers, patching each entry for x in dataset: - assert len(x["metadata"]["board"]) == x["metadata"]["difficulty"]["board_size"] + assert len(x["metadata"]["board"]) == x["metadata"]["board_size"] assert dataset.score_answer(x["answer"], entry=x) == 1.0