diff --git a/.gitignore b/.gitignore
index ce057fd8..d1e0d496 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,7 @@ wheels/
*.egg-info/
.installed.cfg
*.egg
+.python-version
# Virtual Environment
venv/
diff --git a/GALLERY.md b/GALLERY.md
index 01152c21..94bbac28 100644
--- a/GALLERY.md
+++ b/GALLERY.md
@@ -12,6 +12,7 @@ This gallery shows examples from all available datasets using their default conf
- [calendar_arithmetic](#calendar_arithmetic)
- [chain_sum](#chain_sum)
- [color_cube_rotation](#color_cube_rotation)
+- [complex_arithmetic](#complex_arithmetic)
- [countdown](#countdown)
- [course_schedule](#course_schedule)
- [family_relationships](#family_relationships)
@@ -19,8 +20,10 @@ This gallery shows examples from all available datasets using their default conf
- [fraction_simplification](#fraction_simplification)
- [game_of_life](#game_of_life)
- [gcd](#gcd)
+- [group_anagrams](#group_anagrams)
- [gsm_symbolic](#gsm_symbolic)
- [intermediate_integration](#intermediate_integration)
+- [isomorphic_strings](#isomorphic_strings)
- [largest_island](#largest_island)
- [lcm](#lcm)
- [leg_counting](#leg_counting)
@@ -34,19 +37,23 @@ This gallery shows examples from all available datasets using their default conf
- [number_sorting](#number_sorting)
- [palindrome](#palindrome)
- [polynomial_equations](#polynomial_equations)
+- [polynomial_multiplication](#polynomial_multiplication)
- [prime_factorization](#prime_factorization)
- [propositional_logic](#propositional_logic)
- [quantum_lock](#quantum_lock)
- [rubiks_cube](#rubiks_cube)
+- [self_reference](#self_reference)
- [sentence_reordering](#sentence_reordering)
- [simple_equations](#simple_equations)
- [simple_geometry](#simple_geometry)
- [simple_integration](#simple_integration)
+- [sokoban](#sokoban)
- [spell_backward](#spell_backward)
- [sudoku](#sudoku)
- [syllogism](#syllogism)
- [time_intervals](#time_intervals)
- [tower_of_hanoi](#tower_of_hanoi)
+- [tsumego](#tsumego)
- [word_ladder](#word_ladder)
- [group_anagrams](#group_anagrams)
- [spiral_matrix](#spiral_matrix)
@@ -407,17 +414,17 @@ Example tasks:
Example 1:
Question: 4 + 3 =
Answer: 7
-Metadata: {'num_terms': 2, 'num_digits': 1, 'expression': '4 + 3'}
+Metadata: {'difficulty': {'num_terms': 2, 'num_digits': 1}, 'expression': '4 + 3'}
Example 2:
Question: 812 + 880 =
Answer: 1692
-Metadata: {'num_terms': 2, 'num_digits': 3, 'expression': '812 + 880'}
+Metadata: {'difficulty': {'num_terms': 2, 'num_digits': 3}, 'expression': '812 + 880'}
Example 3:
Question: 2 + 6 + 3 + 4 + 0 =
Answer: 15
-Metadata: {'num_terms': 5, 'num_digits': 1, 'expression': '2 + 6 + 3 + 4 + 0'}
+Metadata: {'difficulty': {'num_terms': 5, 'num_digits': 1}, 'expression': '2 + 6 + 3 + 4 + 0'}
````
@@ -489,6 +496,39 @@ Metadata: {'initial_state': {'top': 'orange', 'right': 'cyan', 'front': 'violet'
````
+### complex_arithmetic
+Generates complex number arithmetic problems.
+
+Default configuration:
+```python
+min_real = -10
+max_real = 10
+min_imag = -10
+max_imag = 10
+operations = ('+', '-', '*', '/')
+seed = 42
+size = 500
+```
+
+Example tasks:
+````
+Example 1:
+Question: Add the complex numbers: (-10.0 - 2.0i) + (-3.0 - 3.0i)
+Answer: -13.0 - 5.0i
+Metadata: {'num1': (-10.0, -2.0), 'num2': (-3.0, -3.0), 'operation': '+', 'result': (-13, -5)}
+
+Example 2:
+Question: Add the complex numbers: (-1.0 - 6.0i) + (4.0 + 1.0i)
+Answer: 3.0 - 5.0i
+Metadata: {'num1': (-1.0, -6.0), 'num2': (4.0, 1.0), 'operation': '+', 'result': (3, -5)}
+
+Example 3:
+Question: Divide the complex numbers: (-7.0 - 79.0i) ÷ (-7.0 - 5.0i)
+Answer: 6.0 + 7.0i
+Metadata: {'num1': (-7.0, -79.0), 'num2': (-7.0, -5.0), 'operation': '/', 'result': (6, 7)}
+
+````
+
### countdown
Generates Countdown Number Game tasks
@@ -898,6 +938,75 @@ Metadata: {'numbers': [297, 30], 'result': 3}
````
+### group_anagrams
+Generates Group Anagrams exercises with configurable difficulty
+
+Default configuration:
+```python
+anagram_groups = 10
+max_words_per_group = 5
+size = 500
+seed = 42
+```
+
+Example tasks:
+````
+Example 1:
+Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
+
+Your job is to group the anagrams together. You can return the answer in any order.
+
+Example:
+Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
+Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
+Explanation:
+ - There is no string in the input that can be rearranged to form "bat".
+ - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+
+Group the following list of words into anagrams:
+["tinglers", "argonon", "ditas", "palinodist", "merocyte", "conterminal", "canny", "nancy", "outasight", "autosight", "oversauciness", "applauders", "suprapedal"]
+
+Answer: [["applauders", "suprapedal"], ["argonon"], ["autosight", "outasight"], ["canny", "nancy"], ["conterminal"], ["ditas"], ["merocyte"], ["oversauciness"], ["palinodist"], ["tinglers"]]
+Metadata: {'words': ['tinglers', 'argonon', 'ditas', 'palinodist', 'merocyte', 'conterminal', 'canny', 'nancy', 'outasight', 'autosight', 'oversauciness', 'applauders', 'suprapedal'], 'solution': [['applauders', 'suprapedal'], ['argonon'], ['autosight', 'outasight'], ['canny', 'nancy'], ['conterminal'], ['ditas'], ['merocyte'], ['oversauciness'], ['palinodist'], ['tinglers']]}
+
+Example 2:
+Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
+
+Your job is to group the anagrams together. You can return the answer in any order.
+
+Example:
+Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
+Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
+Explanation:
+ - There is no string in the input that can be rearranged to form "bat".
+ - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+
+Group the following list of words into anagrams:
+["regear", "escrod", "coders", "decors", "credos", "scored", "semitaur", "muriates", "peripterous", "zanies", "expatiater", "wooled", "meningomyelocele", "myelomeningocele", "vainest", "natives", "naivest", "preludes", "repulsed"]
+
+Answer: [["coders", "credos", "decors", "escrod", "scored"], ["expatiater"], ["meningomyelocele", "myelomeningocele"], ["muriates", "semitaur"], ["naivest", "natives", "vainest"], ["peripterous"], ["preludes", "repulsed"], ["regear"], ["wooled"], ["zanies"]]
+Metadata: {'words': ['regear', 'escrod', 'coders', 'decors', 'credos', 'scored', 'semitaur', 'muriates', 'peripterous', 'zanies', 'expatiater', 'wooled', 'meningomyelocele', 'myelomeningocele', 'vainest', 'natives', 'naivest', 'preludes', 'repulsed'], 'solution': [['coders', 'credos', 'decors', 'escrod', 'scored'], ['expatiater'], ['meningomyelocele', 'myelomeningocele'], ['muriates', 'semitaur'], ['naivest', 'natives', 'vainest'], ['peripterous'], ['preludes', 'repulsed'], ['regear'], ['wooled'], ['zanies']]}
+
+Example 3:
+Question: An anagram is a word formed by rearranging the letters of a different word, using all the original letters exactly once.
+
+Your job is to group the anagrams together. You can return the answer in any order.
+
+Example:
+Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
+Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
+Explanation:
+ - There is no string in the input that can be rearranged to form "bat".
+ - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+
+Group the following list of words into anagrams:
+["eagerest", "granitite", "helium", "nizam", "nazim", "striplings", "slipstring", "rearrest", "arrester", "bf", "tadpolism", "canun", "cunan", "isotonic"]
+
+Answer: [["arrester", "rearrest"], ["bf"], ["canun", "cunan"], ["eagerest"], ["granitite"], ["helium"], ["isotonic"], ["nazim", "nizam"], ["slipstring", "striplings"], ["tadpolism"]]
+Metadata: {'words': ['eagerest', 'granitite', 'helium', 'nizam', 'nazim', 'striplings', 'slipstring', 'rearrest', 'arrester', 'bf', 'tadpolism', 'canun', 'cunan', 'isotonic'], 'solution': [['arrester', 'rearrest'], ['bf'], ['canun', 'cunan'], ['eagerest'], ['granitite'], ['helium'], ['isotonic'], ['nazim', 'nizam'], ['slipstring', 'striplings'], ['tadpolism']]}
+
+````
+
### gsm_symbolic
Default configuration:
```python
@@ -967,6 +1076,99 @@ Metadata: {'integrand': '2*asin(x)', 'problem_type': 'by_parts', 'variable': 'x'
````
+### isomorphic_strings
+Generates Isomorphic Strings exercises with configurable difficulty
+
+Default configuration:
+```python
+max_string_length = 10
+p_solvable = 0.5
+size = 500
+seed = 42
+```
+
+Example tasks:
+````
+Example 1:
+Question: Two strings are isomorphic if the characters in one string can be replaced to get the second string.
+
+All occurrences of a character must be replaced with another character while preserving the order of characters.
+
+No two characters may map to the same character, but a character may map to itself.
+
+Example 1:
+Input: egg add
+Output: True
+Explanation: The strings s and t can be made identical by:
+ - Mapping 'e' to 'a'.
+ - Mapping 'g' to 'd'.
+
+Example 2:
+Input: foo bar
+Output: False
+Explanation:
+ - The strings cannot be made identical as 'o' needs to be mapped to both 'a' and 'r'.
+
+Return True if the following two strings are isomorphic, or False otherwise:
+cc bw
+
+Answer: False
+Metadata: {'words': ['cc', 'bw'], 'solution': False, 'solvable': False}
+
+Example 2:
+Question: Two strings are isomorphic if the characters in one string can be replaced to get the second string.
+
+All occurrences of a character must be replaced with another character while preserving the order of characters.
+
+No two characters may map to the same character, but a character may map to itself.
+
+Example 1:
+Input: egg add
+Output: True
+Explanation: The strings s and t can be made identical by:
+ - Mapping 'e' to 'a'.
+ - Mapping 'g' to 'd'.
+
+Example 2:
+Input: foo bar
+Output: False
+Explanation:
+ - The strings cannot be made identical as 'o' needs to be mapped to both 'a' and 'r'.
+
+Return True if the following two strings are isomorphic, or False otherwise:
+nai oik
+
+Answer: True
+Metadata: {'words': ['nai', 'oik'], 'solution': True, 'solvable': True}
+
+Example 3:
+Question: Two strings are isomorphic if the characters in one string can be replaced to get the second string.
+
+All occurrences of a character must be replaced with another character while preserving the order of characters.
+
+No two characters may map to the same character, but a character may map to itself.
+
+Example 1:
+Input: egg add
+Output: True
+Explanation: The strings s and t can be made identical by:
+ - Mapping 'e' to 'a'.
+ - Mapping 'g' to 'd'.
+
+Example 2:
+Input: foo bar
+Output: False
+Explanation:
+ - The strings cannot be made identical as 'o' needs to be mapped to both 'a' and 'r'.
+
+Return True if the following two strings are isomorphic, or False otherwise:
+hogtytyof kgqwfwfgh
+
+Answer: True
+Metadata: {'words': ['hogtytyof', 'kgqwfwfgh'], 'solution': True, 'solvable': True}
+
+````
+
### largest_island
Generates Largest Island exercises with configurable difficulty
@@ -1102,17 +1304,17 @@ Example tasks:
Example 1:
Question: How many legs are there in total if you have 1 sea slug, 1 deer?
Answer: 4
-Metadata: {'animals': {'sea slug': 1, 'deer': 1}, 'total_legs': 4}
+Metadata: {'difficulty': {'num_animals': 2}, 'animals': {'sea slug': 1, 'deer': 1}, 'total_legs': 4}
Example 2:
Question: How many legs are there in total if you have 2 sheeps, 2 dogs?
Answer: 16
-Metadata: {'animals': {'sheep': 2, 'dog': 2}, 'total_legs': 16}
+Metadata: {'difficulty': {'num_animals': 2}, 'animals': {'sheep': 2, 'dog': 2}, 'total_legs': 16}
Example 3:
Question: How many legs are there in total if you have 1 crab, 2 lobsters, 1 human, 1 cow, 1 bee?
Answer: 42
-Metadata: {'animals': {'crab': 1, 'lobster': 2, 'human': 1, 'cow': 1, 'bee': 1}, 'total_legs': 42}
+Metadata: {'difficulty': {'num_animals': 5}, 'animals': {'crab': 1, 'lobster': 2, 'human': 1, 'cow': 1, 'bee': 1}, 'total_legs': 42}
````
@@ -1590,6 +1792,46 @@ Metadata: {'polynomial_expr': '71*n**3 - 2*n - 29', 'variable': 'n', 'degree': 3
````
+### polynomial_multiplication
+Generates [min_polynomials, max_polynomials] random polynomials of degree in [min_degree, max_degree].
+ - The polynomial is formed by summing random terms of the form: coeff * x^exponent.
+ - Then we find "F = P_0 * ... * P_1" using Sympy.
+
+Default configuration:
+```python
+min_terms = 2
+max_terms = 4
+min_value = 1
+max_value = 100
+min_degree = 1
+max_degree = 3
+min_polynomials = 2
+max_polynomials = 3
+single_variable = (True,)
+operators = ('+', '-')
+seed = 42
+size = 500
+```
+
+Example tasks:
+````
+Example 1:
+Question: Calculate the following: (65*x - 72)*(105*x - 125)
+Answer: 6825*x**2 - 15685*x + 9000
+Metadata: {'polynomial_expr': '(65*x - 72)*(105*x - 125)', 'single_variable': (True,), 'result': '6825*x**2 - 15685*x + 9000'}
+
+Example 2:
+Question: Calculate the following: (-9*x**2 - 28*x)*(86*x**2 - 2*x - 13)
+Answer: -774*x**4 - 2390*x**3 + 173*x**2 + 364*x
+Metadata: {'polynomial_expr': '(-9*x**2 - 28*x)*(86*x**2 - 2*x - 13)', 'single_variable': (True,), 'result': '-774*x**4 - 2390*x**3 + 173*x**2 + 364*x'}
+
+Example 3:
+Question: Calculate the following: (43 - 91*x)*(3*x**2 - 10*x)*(71*x**3 - 2*x - 29)
+Answer: -19383*x**6 + 73769*x**5 - 29984*x**4 + 5839*x**3 - 29271*x**2 + 12470*x
+Metadata: {'polynomial_expr': '(43 - 91*x)*(3*x**2 - 10*x)*(71*x**3 - 2*x - 29)', 'single_variable': (True,), 'result': '-19383*x**6 + 73769*x**5 - 29984*x**4 + 5839*x**3 - 29271*x**2 + 12470*x'}
+
+````
+
### prime_factorization
Generates prime factorization tasks
@@ -1788,6 +2030,56 @@ Metadata: {'cube_size': 3, 'scramble_steps': 3, 'scramble_moves': "U R' R'", 'ex
````
+### self_reference
+Generates self-referential puzzles
+
+Default configuration:
+```python
+difficulty = 5
+seed = 42
+size = 500
+```
+
+Example tasks:
+````
+Example 1:
+Question: Given the truthfulness of these statements, please tell me the number of possible solutions:
+ - Statement 1: 'At least 1 of these 7 statements are true.'
+ - Statement 2: 'At most 3 of these 7 statements are false.'
+ - Statement 3: 'Exactly 4 of these 7 statements are true.'
+ - Statement 4: 'Exactly 3 of these 7 statements are false.'
+ - Statement 5: 'Either Statement 3 or Statement 4 is true, but not both.'
+ - Statement 6: 'The number of true statements is a prime number.'
+ - Statement 7: 'The number of false statements is a composite number.'
+
+Answer: 4
+
+Example 2:
+Question: Given the truthfulness of these statements, please tell me the number of possible solutions:
+ - Statement 1: 'At least 4 of these 7 statements are true.'
+ - Statement 2: 'At most 5 of these 7 statements are false.'
+ - Statement 3: 'Exactly 7 of these 7 statements are true.'
+ - Statement 4: 'Exactly 1 of these 7 statements are false.'
+ - Statement 5: 'Either Statement 3 or Statement 4 is true, but not both.'
+ - Statement 6: 'The number of true statements is a prime number.'
+ - Statement 7: 'The number of false statements is a composite number.'
+
+Answer: 4
+
+Example 3:
+Question: Given the truthfulness of these statements, please tell me the number of possible solutions:
+ - Statement 1: 'At least 2 of these 7 statements are true.'
+ - Statement 2: 'At most 5 of these 7 statements are false.'
+ - Statement 3: 'Exactly 0 of these 7 statements are true.'
+ - Statement 4: 'Exactly 3 of these 7 statements are false.'
+ - Statement 5: 'Either Statement 3 or Statement 4 is true, but not both.'
+ - Statement 6: 'The number of true statements is a prime number.'
+ - Statement 7: 'The number of false statements is a composite number.'
+
+Answer: 2
+
+````
+
### sentence_reordering
Generates sentence reordering tasks from text spans
@@ -1924,6 +2216,107 @@ Metadata: {'integrand': '-28*X**3 + 8*X', 'variable': 'X', 'expected_answer_expr
````
+### sokoban
+Generates Sokoban games with configurable parameters
+
+Default configuration:
+```python
+seed = 42
+size = 500
+min_w = 6
+min_h = 6
+max_w = 10
+max_h = 10
+min_boxes = 6
+max_boxes = 10
+```
+
+Example tasks:
+````
+Example 1:
+Question: You are going to solve a 'sokoban' puzzle.
+
+* - The player
+% - The player on a goal
+@ - A box
+X - A goal
+$ - A box on a goal
++ - A wall
+- - An empty position
+
+Your solution must be a string of characters, ex: LDURRUDL.
+
+Here is your puzzle:
++ + + + + + + + +
++ + X - @ * @ X +
++ + + - - @ - + +
++ + + - - - X $ +
++ + + + - + + + +
++ + $ + + + + + +
++ + + + + + + + +
+
+
+Answer: RLDULLRRDLDR
+Metadata: {'gamestr': '+ + + + + + + + + \n+ + X - @ * @ X + \n+ + + - - @ - + + \n+ + + - - - X $ + \n+ + + + - + + + + \n+ + $ + + + + + + \n+ + + + + + + + + \n\n', 'difficulty': {'size': (7, 9), 'num_steps': 12}}
+
+Example 2:
+Question: You are going to solve a 'sokoban' puzzle.
+
+* - The player
+% - The player on a goal
+@ - A box
+X - A goal
+$ - A box on a goal
++ - A wall
+- - An empty position
+
+Your solution must be a string of characters, ex: LDURRUDL.
+
+Here is your puzzle:
++ + + + + +
++ - * - - +
++ @ - - @ +
++ X - @ - +
++ - - - X +
++ X - @ X +
++ - - - - +
++ + + + + +
+
+
+Answer: LDRRDRDDLLURURDULUURDD
+Metadata: {'gamestr': '+ + + + + + \n+ - * - - + \n+ @ - - @ + \n+ X - @ - + \n+ - - - X + \n+ X - @ X + \n+ - - - - + \n+ + + + + + \n\n', 'difficulty': {'size': (8, 6), 'num_steps': 22}}
+
+Example 3:
+Question: You are going to solve a 'sokoban' puzzle.
+
+* - The player
+% - The player on a goal
+@ - A box
+X - A goal
+$ - A box on a goal
++ - A wall
+- - An empty position
+
+Your solution must be a string of characters, ex: LDURRUDL.
+
+Here is your puzzle:
++ + + + + + + + + + + +
++ - $ - X + - - - - - +
++ - @ - - - - - @ - X +
++ - * - @ - - X - $ - +
++ - - - - X + - - - - +
++ + - - - - + $ - @ - +
++ + + - - - - - - - - +
++ + + - - - $ - - - - +
++ + + + - - - - - - - +
++ + + + + + + + + + + +
+
+
+Answer: RRRRURRRLDDRRDLULDRDLLLLULLDRDRUULUUULDLLURRDRU
+Metadata: {'gamestr': '+ + + + + + + + + + + + \n+ - $ - X + - - - - - + \n+ - @ - - - - - @ - X + \n+ - * - @ - - X - $ - + \n+ - - - - X + - - - - + \n+ + - - - - + $ - @ - + \n+ + + - - - - - - - - + \n+ + + - - - $ - - - - + \n+ + + + - - - - - - - + \n+ + + + + + + + + + + + \n\n', 'difficulty': {'size': (10, 12), 'num_steps': 47}}
+
+````
+
### spell_backward
Generates tasks to spell words backward
@@ -2039,12 +2432,10 @@ Generates syllogism reasoning tasks
Default configuration:
```python
-terms = None
allow_all = True
allow_no = True
allow_some = True
allow_some_not = True
-include_invalid = True
invalid_ratio = 0.3
seed = 42
size = 500
@@ -2055,24 +2446,24 @@ Example tasks:
Example 1:
Question: Consider these statements:
1. No students are humans
-2. No humans are chefs
+2. All humans are chefs
Does it logically follow that:
-No students are chefs?
+All students are chefs?
(Answer Yes or No)
-Answer: Yes
-Metadata: {'premise1': 'No students are humans', 'premise2': 'No humans are chefs', 'conclusion': 'No students are chefs', 'is_valid': True}
+Answer: No
+Metadata: {'premise1': 'No students are humans', 'premise2': 'All humans are chefs', 'conclusion': 'All students are chefs', 'is_valid': False}
Example 2:
Question: Consider these statements:
-1. Some children are not animals
-2. Some animals are doctors
+1. All children are animals
+2. No animals are doctors
Does it logically follow that:
-All children are doctors?
+Some children are not doctors?
(Answer Yes or No)
Answer: Yes
-Metadata: {'premise1': 'Some children are not animals', 'premise2': 'Some animals are doctors', 'conclusion': 'All children are doctors', 'is_valid': True}
+Metadata: {'premise1': 'All children are animals', 'premise2': 'No animals are doctors', 'conclusion': 'Some children are not doctors', 'is_valid': True}
Example 3:
Question: Consider these statements:
@@ -2082,8 +2473,8 @@ Question: Consider these statements:
Does it logically follow that:
Some butterflies are not whales?
(Answer Yes or No)
-Answer: No
-Metadata: {'premise1': 'All butterflies are tigers', 'premise2': 'No tigers are whales', 'conclusion': 'Some butterflies are not whales', 'is_valid': False}
+Answer: Yes
+Metadata: {'premise1': 'All butterflies are tigers', 'premise2': 'No tigers are whales', 'conclusion': 'Some butterflies are not whales', 'is_valid': True}
````
@@ -2113,7 +2504,7 @@ Metadata: {'task_type': 'datetime_tz', 'start_time': datetime.datetime(2964, 6,
Example 2:
Question: A video call started at 09:44 and ended at 12:22. How long was the call? Answer in HH:MM.
Answer: 02:38
-Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 5, 9, 44), 'end_time': datetime.datetime(2025, 2, 5, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'}
+Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 7, 9, 44), 'end_time': datetime.datetime(2025, 2, 7, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'}
Example 3:
Question: Calculate the time difference between Sat Dec 22 2677 and Thu Mar 21 2678. Express the result in D days.
@@ -2186,6 +2577,96 @@ Metadata: {'num_disks': 6, 'num_pegs': 3, 'start_peg': 1, 'target_peg': 2, 'auxi
````
+### tsumego
+Generates (one-move) Tsumego problems with configurable parameters
+
+Default configuration:
+```python
+min_board_size = 9
+max_board_size = 13
+max_stones = 15
+size = 10
+seed = 42
+```
+
+Example tasks:
+````
+Example 1:
+Question: I have a Go problem for you. Black moves next - can you capture some of the white stones?
+
+ A B C D E F G H I
+ 9 X . . . X . . . .
+ 8 . . . . . . . . .
+ 7 . O . O . . X . .
+ 6 . . . X . . . . O
+ 5 O . X O X . . . .
+ 4 . X O O . O . . .
+ 3 . . X O X . . . .
+ 2 . . . X . . . . .
+ 1 . O . O . . X . .
+
+X - Black
+O - White
+
+Specify your move in coordinates (e.g. 'C4' for column C, row 4)
+Answer: E4
+
+Metadata: {'difficulty': {'board_size': 9}, 'board': [['X', '.', '.', '.', 'X', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', 'O', '.', 'O', '.', '.', 'X', '.', '.'], ['.', '.', '.', 'X', '.', '.', '.', '.', 'O'], ['O', '.', 'X', 'O', 'X', '.', '.', '.', '.'], ['.', 'X', 'O', 'O', '.', 'O', '.', '.', '.'], ['.', '.', 'X', 'O', 'X', '.', '.', '.', '.'], ['.', '.', '.', 'X', '.', '.', '.', '.', '.'], ['.', 'O', '.', 'O', '.', '.', 'X', '.', '.']], 'solution': 'E4'}
+
+--------------------------------------------------
+
+Example 2:
+Question: Here's a Go challenge. Playing as Black, how can you capture as many white stones as possible?
+
+ A B C D E F G H I
+ 9 . . O . . . . . .
+ 8 . X O . . . . . .
+ 7 X . X . . . . . .
+ 6 O O O X . . . . .
+ 5 X O O . . . . . .
+ 4 . X . . . . . . O
+ 3 . X . . . . X . .
+ 2 O . O . . . . . .
+ 1 . . . . O . . . .
+
+X - Black
+O - White
+
+Specify your move in coordinates (e.g. 'C4' for column C, row 4)
+Answer: B7
+
+Metadata: {'difficulty': {'board_size': 9}, 'board': [['.', '.', 'O', '.', '.', '.', '.', '.', '.'], ['.', 'X', 'O', '.', '.', '.', '.', '.', '.'], ['X', '.', 'X', '.', '.', '.', '.', '.', '.'], ['O', 'O', 'O', 'X', '.', '.', '.', '.', '.'], ['X', 'O', 'O', '.', '.', '.', '.', '.', '.'], ['.', 'X', '.', '.', '.', '.', '.', '.', 'O'], ['.', 'X', '.', '.', '.', '.', 'X', '.', '.'], ['O', '.', 'O', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', 'O', '.', '.', '.', '.']], 'solution': 'B7'}
+
+--------------------------------------------------
+
+Example 3:
+Question: Tsumego time. Black to play and capture some stones.
+Find the key move.
+
+ A B C D E F G H I J K L
+12 . . . . . . . . . . . .
+11 . . X . . . . . . . . .
+10 . . . . . . . . . . . .
+ 9 . . . . . . . . . . . .
+ 8 X . . . . X . . . X . .
+ 7 . X . . . . . . . . . .
+ 6 . O X X . . . . . . . O
+ 5 . X O O X . . . . . . .
+ 4 . O O . . . . . O . . O
+ 3 X . X . . . . . . . . .
+ 2 . . . . . . . . . . . .
+ 1 . . . . . . . . . . X .
+
+X - Black
+O - White
+
+Specify your move in coordinates (e.g. 'C4' for column C, row 4)
+Answer: D4
+
+Metadata: {'difficulty': {'board_size': 12}, 'board': [['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', 'X', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['X', '.', '.', '.', '.', 'X', '.', '.', '.', 'X', '.', '.'], ['.', 'X', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', 'O', 'X', 'X', '.', '.', '.', '.', '.', '.', '.', 'O'], ['.', 'X', 'O', 'O', 'X', '.', '.', '.', '.', '.', '.', '.'], ['.', 'O', 'O', '.', '.', '.', '.', '.', 'O', '.', '.', 'O'], ['X', '.', 'X', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', 'X', '.']], 'solution': 'D4'}
+
+````
+
### word_ladder
Generates word ladder transformation tasks
diff --git a/README.md b/README.md
index f177c0bf..8c8c8199 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@ metadata: {'animals': {'sheep': 2, 'dog': 2}, 'total_legs': 16}
...
```
-See the [Dataset Gallery](GALLERY.md) for a complete list of available datasets with examples.
+See the [Dataset Gallery](https://github.com/open-thought/reasoning-gym/blob/main/GALLERY.md) for a complete list of available datasets with examples.
## Task Overview
@@ -72,6 +72,7 @@ See the [Dataset Gallery](GALLERY.md) for a complete list of available datasets
- `SimpleEquationsDataset`: Generate linear equations with one variable to solve (e.g. "3\*x + 2 = 14")
- `PolynomialEquationsDataset`: Generate polynomial equations with one variable to solve (e.g. "-6*h\*\*4 + 4*h\**2 - 5*h = 0")
+- `PolynomialMultiplicationDataset`: Generate polynomial multiplicatons (e.g. "(8x^3 + x + 2)\*(y - 3)")
### Arithmetic Tasks
@@ -100,6 +101,7 @@ See the [Dataset Gallery](GALLERY.md) for a complete list of available datasets
- `WordLadderDataset`: Generate word ladder puzzles where one word is transformed into another by changing one letter at a time
- `GroupAnagramsDataset`: Group anagrams together in a list of words
- `SprialMatrixDataset`: Print elements of a matrix in spiral order
+- `IsomorphicStrings`: Check if two strings are isomorphic (have the same character mapping)
### Code Tasks
@@ -118,6 +120,7 @@ See the [Dataset Gallery](GALLERY.md) for a complete list of available datasets
- `SyllogismDataset`: Generates a [syllogism](https://en.wikipedia.org/wiki/Syllogism) reasoning dataset
- `AliceInWonderlandDataset`: Generates [AIW](https://openreview.net/forum?id=Mkl7dzjYiW) (Alice In Wonderland) problems with a few variations
- `ZebraDataset`: Generates [Zebra Puzzles](https://en.wikipedia.org/wiki/Zebra_Puzzle) of varying difficulty.
+- `SelfReferenceDataset`: Generates self-referencing logic puzzles.
### Graph Tasks
@@ -129,10 +132,12 @@ See the [Dataset Gallery](GALLERY.md) for a complete list of available datasets
### Game Tasks
- `SudokuDataset`: Generate 9x9 Sudoku puzzles with configurable number of empty cells
+- `SokobanDataset`: Generate [Sokoban](https://en.wikipedia.org/wiki/Sokoban) puzzles with configurable size and detail.
- `MiniSudokuDataset`: Generate 4x4 Mini Sudoku puzzles with configurable difficulty
- `MazeDataset`: Generate a maze with a start and a goal
- `CountdownDataset`: Generate number game tasks where numbers and operators must be combined to reach a target value
- `NQueensDataset`: Generate N-Queens puzzles with configurable board size and number of starting queens
+- `TsumegoDataset`: Generate Tsumego capture puzzles with variable board sizes and stone placements
## Future Generator Ideas
diff --git a/pyproject.toml b/pyproject.toml
index c3cc31b7..794077d3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project]
name = "reasoning_gym"
-version = "0.1.3"
+version = "0.1.5"
authors = [
{ name = "Open-Thought community", email = "andreas.koepf@xamla.com" },
]
@@ -31,20 +31,20 @@ license = "Apache-2.0"
license-files = ["LICENSE*"]
[project.optional-dependencies]
-test = [
- "pytest>=7.0.0",
- "pytest-cov>=4.0.0",
-]
+test = ["pytest>=7.0.0", "pytest-cov>=4.0.0"]
[project.urls]
"Homepage" = "https://github.com/open-thought/reasoning-gym"
"Bug Tracker" = "https://github.com/open-thought/reasoning-gym/issues"
-[tool.hatch.build.targets.wheel]
-packages = ["reasoning_gym"]
[tool.hatch.build]
-include = ["reasoning_gym/**/*.txt"]
+packages = ["reasoning_gym"]
+include = [
+ "reasoning_gym/**/*.py",
+ "reasoning_gym/**/*.txt",
+ "reasoning_gym/**/levels/*",
+]
[tool.black]
line-length = 120
@@ -58,6 +58,4 @@ line_length = 120
[tool.pytest.ini_options]
addopts = "-ra -q"
-testpaths = [
- "tests",
-]
+testpaths = ["tests"]
diff --git a/reasoning_gym/__init__.py b/reasoning_gym/__init__.py
index 019873ff..ecca7f3f 100644
--- a/reasoning_gym/__init__.py
+++ b/reasoning_gym/__init__.py
@@ -5,7 +5,7 @@ Reasoning Gym - A library of procedural dataset generators for training reasonin
from . import algebra, algorithmic, arithmetic, code, cognition, data, games, geometry, graphs, logic
from .factory import create_dataset, register_dataset
-__version__ = "0.1.3"
+__version__ = "0.1.5"
__all__ = [
"algebra",
"algorithmic",
diff --git a/reasoning_gym/algebra/__init__.py b/reasoning_gym/algebra/__init__.py
index fc7a867a..fc77b977 100644
--- a/reasoning_gym/algebra/__init__.py
+++ b/reasoning_gym/algebra/__init__.py
@@ -1,9 +1,13 @@
+from .complex_arithmetic import ComplexArithmeticConfig, ComplexArithmeticDataset
from .intermediate_integration import IntermediateIntegrationConfig, IntermediateIntegrationDataset
from .polynomial_equations import PolynomialEquationsConfig, PolynomialEquationsDataset
+from .polynomial_multiplication import PolynomialMultiplicationConfig, PolynomialMultiplicationDataset
from .simple_equations import SimpleEquationsConfig, SimpleEquationsDataset
from .simple_integration import SimpleIntegrationConfig, SimpleIntegrationDataset
__all__ = [
+ "ComplexArithmeticConfig",
+ "ComplexArithmeticDataset",
"IntermediateIntegrationConfig",
"IntermediateIntegrationDataset",
"PolynomialEquationsConfig",
@@ -12,4 +16,6 @@ __all__ = [
"SimpleEquationsConfig",
"SimpleIntegrationConfig",
"SimpleIntegrationDataset",
+ "PolynomialMultiplicationConfig",
+ "PolynomialMultiplicationDataset",
]
diff --git a/reasoning_gym/algebra/complex_arithmetic.py b/reasoning_gym/algebra/complex_arithmetic.py
new file mode 100644
index 00000000..7c749eaa
--- /dev/null
+++ b/reasoning_gym/algebra/complex_arithmetic.py
@@ -0,0 +1,147 @@
+import cmath
+import math
+import random
+from dataclasses import dataclass
+from typing import Optional, Tuple
+
+from ..factory import ProceduralDataset, register_dataset
+
+
+@dataclass
+class ComplexArithmeticConfig:
+ min_real: int = -10
+ max_real: int = 10
+ min_imag: int = -10
+ max_imag: int = 10
+ operations: Tuple[str, ...] = ("+", "-", "*", "/")
+ seed: Optional[int] = None
+ size: int = 500
+
+ def validate(self) -> None:
+ """Validate configuration parameters."""
+ assert self.max_real >= self.min_real, "max_real must be >= min_real"
+ assert self.max_imag >= self.min_imag, "max_imag must be >= min_imag"
+ assert all(op in ("+", "-", "*", "/") for op in self.operations), "invalid operator"
+
+
+class ComplexArithmeticDataset(ProceduralDataset):
+ """Generates complex number arithmetic problems."""
+
+ def __init__(self, config: ComplexArithmeticConfig):
+ self._prompt_templates = {
+ "+": "Add the complex numbers: ({a}) + ({b})",
+ "-": "Subtract the complex numbers: ({a}) - ({b})",
+ "*": "Multiply the complex numbers: ({a}) × ({b})",
+ "/": "Divide the complex numbers: ({a}) ÷ ({b})",
+ }
+ super().__init__(config=config, seed=config.seed, size=config.size)
+
+ def _generate_complex(self, rng: random.Random) -> complex:
+ """Generate a random complex number."""
+ real = rng.randint(self.config.min_real, self.config.max_real)
+ imag = rng.randint(self.config.min_imag, self.config.max_imag)
+ return complex(real, imag)
+
+ def _format_complex(self, z: complex) -> str:
+ """Format complex number with 2 decimal places."""
+ real, imag = z.real, z.imag
+ if abs(imag) < 1e-10:
+ return f"{real:.2f}"
+ elif abs(real) < 1e-10:
+ return f"{imag:.2f}i"
+ else:
+ sign = "+" if imag >= 0 else "-"
+ return f"{real} {sign} {abs(imag)}i"
+
+ def __getitem__(self, idx: int) -> dict:
+ rng = random.Random(self.seed + idx)
+
+ # Choose random operation
+ op = rng.choice(self.config.operations)
+
+ if op == "/":
+ # For division, first generate the quotient (a) and divisor (b)
+ # Then calculate the dividend (result) as a * b
+ a = self._generate_complex(rng) # This will be the final result
+ b = self._generate_complex(rng)
+ while b == 0: # Ensure non-zero divisor
+ b = self._generate_complex(rng)
+ result = a # Store the intended result
+ a = result * b # Calculate dividend to ensure whole number division
+ else:
+ # For other operations, generate numbers normally
+ a = self._generate_complex(rng)
+ b = self._generate_complex(rng)
+
+ # Calculate result
+ if op == "+":
+ result = a + b
+ elif op == "-":
+ result = a - b
+ else: # op == "*"
+ result = a * b
+
+ question = self._prompt_templates[op].format(a=self._format_complex(a), b=self._format_complex(b))
+
+ return {
+ "question": question,
+ "answer": self._format_complex(result),
+ "metadata": {
+ "num1": (a.real, a.imag),
+ "num2": (b.real, b.imag),
+ "operation": op,
+ "result": (int(result.real), int(result.imag)), # Convert to int since we ensure whole numbers
+ },
+ }
+
+ @staticmethod
+ def parse_string_to_complex(answer: str) -> complex:
+ try:
+ # Normalize the answer string by removing spaces and converting to lowercase
+ answer = answer.replace(" ", "").lower()
+ # Convert mathematical notation 'i' to Python's 'j' for complex numbers
+ answer = answer.replace("i", "j")
+
+ # Handle real numbers (no imaginary part)
+ if "j" not in answer:
+ student_result = complex(float(answer))
+ else:
+ # Handle cases like "j" or "2j" (implicit coefficient)
+ if answer[0] == "j":
+ # Convert "j" to "1j", "2j" remains unchanged
+ answer = "1" + answer
+ # Handle cases like "3j" where there's no explicit + or - before j
+ elif answer[-1] == "j" and not any(c in answer[:-1] for c in "+-"):
+ # Convert "3j" to "3+1j"
+ answer = answer.replace("j", "+1j")
+
+ # Ensure the string has an imaginary part, even if zero
+ if "j" not in answer:
+ answer += "+0j"
+
+ # Parse the normalized string into a complex number
+ student_result = complex(answer)
+
+ except ValueError:
+ return None
+
+ return student_result
+
+ def score_answer(self, answer: str, metadata: dict) -> float:
+ """Score the answer using exponential distance-based scoring."""
+ if answer is None:
+ return 0.0
+
+ try:
+ student_result = self.parse_string_to_complex(answer)
+ expected_result = complex(*metadata["result"])
+ # Calculate distance-based score using exponential decay
+ distance = abs(student_result - expected_result)
+ score = min(1.0, math.exp(-distance)) # Add 'import math' at the top
+ return score
+
+ except (ValueError, TypeError):
+ return 0.0
+
+
+register_dataset("complex_arithmetic", ComplexArithmeticDataset, ComplexArithmeticConfig)
diff --git a/reasoning_gym/algebra/polynomial_multiplication.py b/reasoning_gym/algebra/polynomial_multiplication.py
new file mode 100644
index 00000000..9bcadc66
--- /dev/null
+++ b/reasoning_gym/algebra/polynomial_multiplication.py
@@ -0,0 +1,161 @@
+import random
+import string
+from dataclasses import dataclass
+from typing import Any, Dict, Optional, Tuple
+
+import sympy as sp
+from sympy import Eq, Symbol, expand, solve
+
+from ..factory import ProceduralDataset, register_dataset
+
+
+@dataclass
+class PolynomialMultiplicationConfig:
+ """
+ Configuration for polynomial multiplication task generation.
+ """
+
+ min_terms: int = 2 # Minimum number of polynomial terms
+ max_terms: int = 4 # Maximum number of polynomial terms
+ min_value: int = 1 # Minimum value for coefficients
+ max_value: int = 100 # Maximum value for coefficients
+ min_degree: int = 1 # Minimum polynomial degree
+ max_degree: int = 3 # Maximum polynomial degree
+ min_polynomials: int = 2 # Minimum number of polynomials being multiplied
+ max_polynomials: int = 3 # Maximum number of polynomials being multiplied
+ single_variable: bool = (True,)
+ operators: Tuple[str, ...] = (
+ "+",
+ "-",
+ ) # Allowed operators between terms, Avoid adding '*' or '/' because they will affect the degree
+ seed: Optional[int] = None
+ size: int = 500
+
+ def validate(self) -> None:
+ """Validate configuration parameters."""
+ assert self.min_terms > 0, "min_terms must be positive."
+ assert self.max_terms >= self.min_terms, "max_terms must be >= min_terms."
+
+ assert self.min_value > 0, "min_value must be positive."
+ assert self.max_value >= self.min_value, "max_value must be >= min_value."
+
+ assert self.min_degree >= 1, "min_degree must be >= 1."
+ assert self.max_degree >= self.min_degree, "max_degree must be >= min_degree."
+
+ assert self.min_polynomials >= 2, "min_polynomials must be >= 2."
+ assert self.max_polynomials >= self.min_polynomials, "max_polynomials must be >= min_polynomials."
+
+ allowed_ops = {"+", "-"}
+ assert len(self.operators) > 0, "operators tuple cannot be empty."
+ assert all(op in allowed_ops for op in self.operators), "Invalid operator found. Must be a subset of {+, -}."
+
+
+class PolynomialMultiplicationDataset(ProceduralDataset):
+ """
+ Generates [min_polynomials, max_polynomials] random polynomials of degree in [min_degree, max_degree].
+ - The polynomial is formed by summing random terms of the form: coeff * x^exponent.
+ - Then we find "F = P_0 * ... * P_1" using Sympy.
+ """
+
+ def __init__(self, config: PolynomialMultiplicationConfig):
+ self._prompt_templates = [
+ "Simplify this expression: {polynomial_expr}",
+ "Calculate the following: {polynomial_expr}",
+ ]
+ super().__init__(config=config, seed=config.seed, size=config.size)
+
+ def __getitem__(self, idx: int) -> dict:
+ """
+ Generate a single polynomial multiplication item.
+
+ Returns:
+ A dict with:
+ - question: str (e.g. "Multiply polynomials: (8x^3 + x + 2)*(x - 3)")
+ - answer: str (Product, e.g. "8x^4 - 24x^3 + x^2 - x - 6")
+ - metadata: dict with details (polynomial_expr, single_variable)
+ """
+ rng = random.Random(self.seed + idx)
+ number_polynomials = rng.randint(self.config.min_polynomials, self.config.max_polynomials)
+ polynomials = [self._generate_polynomial_expr(rng) for i in range(number_polynomials)]
+
+ polynomial_expr = sp.prod(polynomials)
+ product = sp.expand(polynomial_expr)
+
+ return {
+ "question": rng.choice(self._prompt_templates).format(
+ polynomial_expr=polynomial_expr,
+ ),
+ "answer": product,
+ "metadata": {
+ "polynomial_expr": str(polynomial_expr),
+ "single_variable": self.config.single_variable,
+ "result": str(product),
+ },
+ }
+
+ def _get_variable(self, rng: random.Random) -> str:
+ """Get a random lowercase variable name"""
+ if self.config.single_variable:
+ return "x"
+ return rng.choice(string.ascii_lowercase)
+
+ def _generate_polynomial_expr(self, rng: random.Random):
+ """
+ Randomly generate a polynomial expression of 'degree'.
+ We'll use the config parameters:
+ - min_terms, max_terms: how many total terms to combine
+ - min_value, max_value: range for coefficients
+ - operators: to decide sign flips or direct addition
+
+ Args:
+ rng: Random number generator
+
+ Returns:
+ Polynomial string
+ """
+ variable = self._get_variable(rng)
+ degree = rng.randint(self.config.min_degree, self.config.max_degree)
+
+ x = Symbol(variable)
+
+ # Choose the number of terms and their respective degrees
+ num_terms = rng.randint(self.config.min_terms, self.config.max_terms)
+ # Keep track of exponents, exponents can repeat or skip but we force the highest exponent
+ chosen_exponents = [degree]
+ # Fill the rest randomly in [0, degree]
+ for _ in range(num_terms - 1):
+ exp = rng.randint(0, degree)
+ chosen_exponents.append(exp)
+
+ # Now build the polynomial expression: sum_{term}( coeff * x^exponent ), with optional sign
+ polynomial_expr = 0
+ for exp in chosen_exponents:
+ coeff = rng.randint(self.config.min_value, self.config.max_value)
+ # If '-' in operators, we can randomly flip the sign
+ if "-" in self.config.operators and rng.random() < 0.5:
+ coeff = -coeff
+ term_expr = coeff * (x**exp)
+ polynomial_expr += term_expr
+
+ return polynomial_expr
+
+ def score_answer(self, answer: Optional[str], metadata: Dict[str, Any]) -> float:
+ reward = 0.0
+ if answer is not None:
+ try:
+ predicted_poly = sp.parse_expr(answer)
+ target_poly = sp.parse_expr(metadata["result"])
+
+ # Check if the difference simplifies to zero (i.e. they are equivalent).
+ if sp.simplify(predicted_poly - target_poly) == 0:
+ reward = 1.0
+ elif answer.strip():
+ reward = 0.05
+ else:
+ reward = 0.01
+ except Exception:
+ reward = 0.01
+ return reward
+
+
+register_dataset("polynomial_multiplication", PolynomialMultiplicationDataset, PolynomialMultiplicationConfig)
diff --git a/reasoning_gym/algorithmic/__init__.py b/reasoning_gym/algorithmic/__init__.py
index 4c799bcd..788a927d 100644
--- a/reasoning_gym/algorithmic/__init__.py
+++ b/reasoning_gym/algorithmic/__init__.py
@@ -9,6 +9,7 @@ Algorithmic tasks for training reasoning capabilities:
from .base_conversion import BaseConversionConfig, BaseConversionDataset
from .caesar_cipher import CaesarCipherConfig, CaesarCipherDataset
from .group_anagrams import GroupAnagramsConfig, GroupAnagramsDataset
+from .isomorphic_strings import IsomorphicStringsConfig, IsomorphicStringsDataset
from .letter_counting import LetterCountingConfig, LetterCountingDataset
from .letter_jumble import LetterJumbleConfig, LetterJumbleDataset
from .number_filtering import NumberFilteringConfig, NumberFilteringDataset
@@ -51,4 +52,6 @@ __all__ = [
"GroupAnagramsDataset",
"SpiralMatrixConfig",
"SpiralMatrixDataset",
+ "IsomorphicStringsConfig",
+ "IsomorphicStringsDataset",
]
diff --git a/reasoning_gym/algorithmic/isomorphic_strings.py b/reasoning_gym/algorithmic/isomorphic_strings.py
new file mode 100644
index 00000000..3b4a59e5
--- /dev/null
+++ b/reasoning_gym/algorithmic/isomorphic_strings.py
@@ -0,0 +1,121 @@
+"""Check if two strings are isomorphic.
+
+Two strings are isomorphic if the characters in one string can be replaced to get the second string.
+
+A popular Leetcode problem:
+https://leetcode.com/problems/isomorphic-strings/description/
+"""
+
+from dataclasses import dataclass
+from random import Random
+from typing import Optional
+
+from ..factory import ProceduralDataset, register_dataset
+
+QUESTION_TEMPLATE = """Two strings are isomorphic if the characters in one string can be replaced to get the second string.
+
+All occurrences of a character must be replaced with another character while preserving the order of characters.
+
+No two characters may map to the same character, but a character may map to itself.
+
+Example 1:
+Input: egg add
+Output: True
+Explanation: The strings s and t can be made identical by:
+ - Mapping 'e' to 'a'.
+ - Mapping 'g' to 'd'.
+
+Example 2:
+Input: foo bar
+Output: False
+Explanation:
+ - The strings cannot be made identical as 'o' needs to be mapped to both 'a' and 'r'.
+
+Return True if the following two strings are isomorphic, or False otherwise:
+{s} {t}
+"""
+
+
+@dataclass
+class IsomorphicStringsConfig:
+ """Configuration for Isomorphic Strings dataset generation"""
+
+ max_string_length: int = 10 # Maximum length of the strings
+ p_solvable: float = 0.5 # Probability that the generated question is solvable
+
+ size: int = 500 # Virtual dataset size
+ seed: Optional[int] = None
+
+ def validate(self):
+ """Validate configuration parameters"""
+ assert 2 <= self.max_string_length, "max_string_length must be at least 2"
+ assert 0 <= self.p_solvable <= 1, "p_solvable must be between 0 and 1"
+
+
+class IsomorphicStringsDataset(ProceduralDataset):
+ """Generates Isomorphic Strings exercises with configurable difficulty"""
+
+ def __init__(self, config: IsomorphicStringsConfig):
+ super().__init__(config=config, seed=config.seed, size=config.size)
+ self.letters = {chr(i) for i in range(ord("a"), ord("z") + 1)}
+
+ def _check_isomorphic(self, s: str, t: str) -> bool:
+ """Check if two strings are isomorphic"""
+ if len(s) != len(t):
+ return False
+
+ mapping, inverse_mapping = {}, {} # s -> t, t -> s
+ for i in range(len(s)):
+ if (s[i] in mapping and mapping[s[i]] != t[i]) or (
+ t[i] in inverse_mapping and s[i] != inverse_mapping[t[i]]
+ ):
+ return False
+ mapping[s[i]] = t[i]
+ inverse_mapping[t[i]] = s[i]
+
+ return True
+
+ def _generate_inputs(self, rng: Random, solvable: bool) -> tuple[str, str]:
+ """Generate the two input strings"""
+ s, t = [], []
+ mapping = {}
+
+ # Generate a valid isomorphic pair first (leave one character for potential conflict)
+ for _ in range(rng.randint(1, self.config.max_string_length - 1)):
+ char_s = rng.choice(list(self.letters))
+ if char_s not in mapping:
+ # Choose a random character that is not already mapped
+ char_t = rng.choice(list(self.letters - set(mapping.values())))
+ mapping[char_s] = char_t
+ else:
+ # Use the existing mapping
+ char_t = mapping[char_s]
+ s.append(char_s)
+ t.append(char_t)
+
+ if not solvable:
+ # Solution should be unsolvable, create conflict
+ letter = rng.choice(list(mapping.keys()))
+ conflict = rng.choice(list(self.letters - {mapping[letter]}))
+ insert_idx = rng.randint(0, len(s))
+ s.insert(insert_idx, letter)
+ t.insert(insert_idx, conflict)
+
+ return "".join(s), "".join(t)
+
+ def __getitem__(self, idx: int) -> dict:
+ """Generate a single Isomorphic Strings question"""
+ rng = Random(self.seed + idx)
+
+ solvable = rng.random() < self.config.p_solvable
+ s, t = self._generate_inputs(rng, solvable)
+ answer = self._check_isomorphic(s, t)
+
+ return {
+ "question": QUESTION_TEMPLATE.format(s=s, t=t),
+ "answer": str(answer),
+ "metadata": {"words": [s, t], "solution": answer, "solvable": solvable},
+ }
+
+
+register_dataset("isomorphic_strings", IsomorphicStringsDataset, IsomorphicStringsConfig)
diff --git a/reasoning_gym/games/__init__.py b/reasoning_gym/games/__init__.py
index 8e4e32d6..295f6cdf 100644
--- a/reasoning_gym/games/__init__.py
+++ b/reasoning_gym/games/__init__.py
@@ -11,8 +11,10 @@ from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
from .maze import MazeConfig, MazeDataset
from .mini_sudoku import MiniSudokuConfig, MiniSudokuDataset
from .n_queens import NQueensDataset
+from .sokoban import SokobanConfig, SokobanDataset
from .sudoku import SudokuConfig, SudokuDataset
from .tower_of_hanoi import HanoiConfig, HanoiDataset
+from .tsumego import TsumegoConfig, TsumegoDataset
__all__ = [
"CountdownConfig",
@@ -21,6 +23,8 @@ __all__ = [
"MiniSudokuDataset",
"SudokuConfig",
"SudokuDataset",
+ "SokobanConfig",
+ "SokobanDataset",
"MazeConfig",
"MazeDataset",
"GameOfLifeConfig",
@@ -28,4 +32,6 @@ __all__ = [
"HanoiConfig",
"HanoiDataset",
"NQueensDataset",
+ "TsumegoConfig",
+ "TsumegoDataset",
]
diff --git a/reasoning_gym/games/contrib/__init__.py b/reasoning_gym/games/contrib/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/reasoning_gym/games/contrib/sokoban/LICENSE b/reasoning_gym/games/contrib/sokoban/LICENSE
new file mode 100644
index 00000000..84d0d484
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Bruno Andrade
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/reasoning_gym/games/contrib/sokoban/README.md b/reasoning_gym/games/contrib/sokoban/README.md
new file mode 100644
index 00000000..44d565ea
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/README.md
@@ -0,0 +1,52 @@
+# 📦 Sokoban Solver and Generator
+
+This folder contains a minified version of Bruno Andrade's Sokoban game, all pygame dependencies were stripped.
+
+The original version can be found here: [xbandrade/sokoban-solver-generator](https://github.com/xbandrade/sokoban-solver-generator)
+
+
+This is a Sokoban puzzle generator and solver that uses BFS, A* and Dijkstra search algorithms.
+
+`Sokoban` is a puzzle game in which the player pushes boxes around in a warehouse, trying to get every box to a goal.
+
+
+### ❕Sokoban Puzzle
+The puzzle states are stored in a matrix, and each element of the puzzle is represented by a single character in the matrix.
+```
++ + + + + + +
++ * - @ - X +
++ + - @ - + +
++ X - - - $ +
++ + + + + + +
+```
+`*` - The player
+`%` - The player on a goal
+`@` - A box
+`X` - A goal
+`$` - A box on a goal
+`+` - A wall
+`-` - An empty position
+
+A box on a goal will have its color changed to green on the game window.
+
+
+### ❕Sokoban Generator
+
+The generator will initially create a puzzle with a random board size, then the player and the boxes on goals will be randomly placed on the board.
+The player will only be able to pull boxes from their positions during the generation of a puzzle, breaking every wall on his way, so it is guaranteed that the puzzle will have a valid solution.
+
+
+### ❕ Sokoban Solver
+
+The algorithms used to implement the Sokoban puzzle solvers were `Breadth-First Search(BFS)` and `A*`.
+
+The `BFS` solver uses a queue to store the next states of the puzzle it needs to visit. A visited state is stored in a hashset, and BFS won't try to visit the same state twice.
+
+The `A*` algorithm is similar to the BFS algorithm, but it uses a priority queue instead of a queue, and it prioritizes moves that are more likely to solve the problem.
+It does so by setting costs to the puzzle state and the player's movements, punishing the player with high costs for a bad move and rewarding the player with lower costs for a good move.
+The state costs are defined by heuristic functions, and this solver was implemented with two different heuristics: the `Manhattan Distance` function and `Dijkstra` distance function.
+
+All three implementations check for possible deadlocks (states that are impossible to solve) before adding the new state to the queue.
+
+
+More about Sokoban: [Wikipedia Article](https://en.wikipedia.org/wiki/Sokoban)
diff --git a/reasoning_gym/games/contrib/sokoban/__init__.py b/reasoning_gym/games/contrib/sokoban/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl0.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl0.dat
new file mode 100644
index 00000000..867d112a
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl0.dat
@@ -0,0 +1,10 @@
++ + + + + + +
++ - * - - - +
++ - - - $ - +
++ X - - @ - +
++ - - - - - +
++ $ - + - - +
++ + - - - - +
++ X @ - $ - +
++ + - - - - +
++ + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl1.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl1.dat
new file mode 100644
index 00000000..9ba48c31
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl1.dat
@@ -0,0 +1,5 @@
++ + + + + + +
++ * - @ - X +
++ + - @ - + +
++ X - - - - +
++ + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl2.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl2.dat
new file mode 100644
index 00000000..46755810
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl2.dat
@@ -0,0 +1,6 @@
+- - + + + + + +
+- + + - - - * +
++ + - - - + X +
++ X - @ - @ @ +
++ X X @ - - - +
++ + + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl3.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl3.dat
new file mode 100644
index 00000000..9d0bc599
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl3.dat
@@ -0,0 +1,7 @@
+- + + + + + + - - -
+- + X - - X + - - -
++ + - @ @ + + - - -
++ - - - - + + - - -
++ - @ - - * + + + +
++ + - - - - - - X +
+- + + + + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl4.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl4.dat
new file mode 100644
index 00000000..42fbc6eb
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl4.dat
@@ -0,0 +1,7 @@
+- + + + + + + - -
++ + X - @ - + + +
++ - - - - - - - +
++ - @ + + X - @ +
++ - - - @ - + - +
++ + + * - X - X +
+- - + + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl5.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl5.dat
new file mode 100644
index 00000000..3a096d58
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl5.dat
@@ -0,0 +1,7 @@
+- + + + + + + + -
++ + - - + - - + +
++ - @ - - - @ - +
++ - - X * X - - +
++ + @ + + - - + +
++ - - X - - - + -
++ + + + + + + + -
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl6.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl6.dat
new file mode 100644
index 00000000..32ee5bbc
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl6.dat
@@ -0,0 +1,9 @@
+- - - + + + + + + + +
+- - - + - - - - - - +
+- - + + - - - - @ - +
+- + + - - + + - + + +
++ + - - + - - X - - +
++ - - + X @ @ - - + +
++ * + X - - - - + + -
++ + - - - - - + + - -
++ + + + + + + + - - -
diff --git a/reasoning_gym/games/contrib/sokoban/levels/lvl7.dat b/reasoning_gym/games/contrib/sokoban/levels/lvl7.dat
new file mode 100644
index 00000000..9c2fe302
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/levels/lvl7.dat
@@ -0,0 +1,6 @@
++ + + + + + + +
++ - - @ - X * +
++ - @ - - + X +
++ X X @ - @ @ +
++ X X @ - - - +
++ + + + + + + +
diff --git a/reasoning_gym/games/contrib/sokoban/src/__init__.py b/reasoning_gym/games/contrib/sokoban/src/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/reasoning_gym/games/contrib/sokoban/src/astar.py b/reasoning_gym/games/contrib/sokoban/src/astar.py
new file mode 100644
index 00000000..25d1e63d
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/astar.py
@@ -0,0 +1,81 @@
+from collections import defaultdict
+from heapq import heappop, heappush
+
+import numpy as np
+
+from reasoning_gym.games.contrib.sokoban.src.utils import (
+ can_move,
+ dijkstra_sum,
+ get_state,
+ is_deadlock,
+ is_solved,
+ manhattan_sum,
+)
+
+
+def astar(matrix, player_pos, debug=False, heuristic="manhattan"):
+ # print(f'A* - {heuristic.title()} Heuristic')
+ heur = "[A*]" if heuristic == "manhattan" else "[Dijkstra]"
+ shape = matrix.shape
+ initial_state = get_state(matrix)
+ initial_cost = curr_depth = 0
+ if heuristic == "manhattan":
+ curr_cost = manhattan_sum(initial_state, player_pos, shape)
+ else:
+ distances = defaultdict(lambda: [])
+ curr_cost = dijkstra_sum(initial_state, player_pos, shape, distances)
+ seen = {None}
+ heap = []
+ heappush(heap, (initial_cost, curr_cost, initial_state, player_pos, curr_depth, ""))
+ moves = [(1, 0), (-1, 0), (0, -1), (0, 1)]
+ direction = {
+ (1, 0): "D",
+ (-1, 0): "U",
+ (0, -1): "L",
+ (0, 1): "R",
+ }
+ while heap:
+ _, curr_cost, state, pos, depth, path = heappop(heap)
+ seen.add(state)
+ for move in moves:
+ new_state, move_cost = can_move(state, shape, pos, move)
+ deadlock = is_deadlock(new_state, shape)
+ if new_state in seen or deadlock:
+ continue
+ new_pos = pos[0] + move[0], pos[1] + move[1]
+ if heuristic == "manhattan":
+ new_cost = manhattan_sum(new_state, new_pos, shape)
+ else:
+ new_cost = dijkstra_sum(new_state, new_pos, shape, distances)
+ if new_cost == float("inf"):
+ continue
+ heappush(
+ heap,
+ (
+ move_cost + curr_cost,
+ new_cost,
+ new_state,
+ new_pos,
+ depth + 1,
+ path + direction[move],
+ ),
+ )
+ if is_solved(new_state):
+ # print(f'{heur} Solution found!\n\n{path + direction[move]}\nDepth {depth + 1}\n')
+ if debug:
+ print(f"{heur} Solution Found!\n{path + direction[move]}", 20)
+ return (path + direction[move], depth + 1)
+ if debug:
+ print(f"{heur} Solution Depth: {depth + 1}\n{path + direction[move]}", 20)
+ print(f"{heur} Solution not found!\n")
+ if debug:
+ print(f"{heur} Solution Not Found!\nDepth {depth + 1}", 20)
+
+ return (None, -1 if not heap else depth + 1)
+
+
+def solve_astar(puzzle, visualizer=False, heuristic="manhattan"):
+ matrix = puzzle
+ where = np.where((matrix == "*") | (matrix == "%"))
+ player_pos = where[0][0], where[1][0]
+ return astar(matrix, player_pos, debug=visualizer, heuristic=heuristic)
diff --git a/reasoning_gym/games/contrib/sokoban/src/bfs.py b/reasoning_gym/games/contrib/sokoban/src/bfs.py
new file mode 100644
index 00000000..d6a376c9
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/bfs.py
@@ -0,0 +1,66 @@
+import time
+from collections import deque
+
+import numpy as np
+
+from reasoning_gym.games.contrib.sokoban.src.utils import can_move, get_state, is_deadlock, is_solved, print_state
+
+
+def bfs(matrix, player_pos, debug=False):
+ print("Breadth-First Search")
+ initial_state = get_state(matrix)
+ shape = matrix.shape
+ print_state(initial_state, shape)
+ seen = {None}
+ q = deque([(initial_state, player_pos, 0, "")])
+ moves = [(1, 0), (-1, 0), (0, -1), (0, 1)]
+ curr_depth = 0
+ direction = {
+ (1, 0): "D",
+ (-1, 0): "U",
+ (0, -1): "L",
+ (0, 1): "R",
+ }
+ while q:
+ state, pos, depth, path = q.popleft()
+ # if depth != curr_depth:
+ # print(f'Depth: {depth}')
+ # curr_depth = depth
+ seen.add(state)
+ for move in moves:
+ new_state, _ = can_move(state, shape, pos, move)
+ deadlock = is_deadlock(new_state, shape)
+ if new_state in seen or deadlock:
+ continue
+ q.append(
+ (
+ new_state,
+ (pos[0] + move[0], pos[1] + move[1]),
+ depth + 1,
+ path + direction[move],
+ )
+ )
+ if is_solved(new_state):
+ print(f"[BFS] Solution found!\n\n{path + direction[move]}\nDepth {depth + 1}\n")
+ if debug:
+ print(f"[BFS] Solution Found!\n{path + direction[move]}", 20)
+ return (path + direction[move], depth + 1)
+ if debug:
+ print(f"[BFS] Solution Depth: {depth + 1}\n{path + direction[move]}", 20)
+ print(f"[BFS] Solution not found!\n")
+ if debug:
+ print(f"[BFS] Solution Not Found!\nDepth {depth + 1}", 20)
+ return (None, -1 if not q else depth + 1)
+
+
+def solve_bfs(puzzle, visualizer=False):
+ matrix = puzzle
+ where = np.where((matrix == "*") | (matrix == "%"))
+ player_pos = where[0][0], where[1][0]
+ return bfs(matrix, player_pos, debug=visualizer)
+
+
+if __name__ == "__main__":
+ start = time.time()
+ root = solve_bfs(np.loadtxt("levels/lvl7.dat", dtype=" str:
+ return self.char
+
+
+class Game:
+ def __init__(self, width=19, height=10, level=None, path=None):
+ self.level = level
+ self.width = width
+ self.height = height
+ self.puzzle = np.empty((height, width), dtype=PuzzleElement)
+
+ self.player = None
+ self.puzzle_size = None
+ self.pad_x = 0
+ self.pad_y = 0
+ self.path = path or f"levels/lvl{level}.dat"
+
+ if path:
+ if type(self) == Game:
+ self.load_puzzle()
+
+ def get_matrix(self):
+ slice_x = slice(self.pad_x, self.pad_x + self.puzzle_size[1])
+ slice_y = slice(self.pad_y, self.pad_y + self.puzzle_size[0])
+ sliced = self.puzzle[slice_y, slice_x]
+ matrix = np.empty((self.puzzle_size), dtype=" 0 else 0)
+ pad_x = (self.width - self.puzzle_size[1] - 2) // 2 # -2 matches original file-based logic
+ pad_y = (self.height - self.puzzle_size[0]) // 2
+ self.pad_x, self.pad_y = pad_x, pad_y
+
+ # Populate puzzle elements
+ for i, row in enumerate(data):
+ for j, c in enumerate(row):
+ new_elem = PuzzleElement(c)
+ self.puzzle[i + pad_y, j + pad_x] = new_elem
+
+ # Create game objects based on characters
+ if c == "+": # Wall
+ new_elem.obj = Obstacle(x=j + pad_x, y=i + pad_y)
+ elif c == "@": # Box
+ new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+ elif c == "*": # Player
+ new_elem.obj = Player(x=j + pad_x, y=i + pad_y, game=self)
+ self.player = new_elem.obj
+ elif c == "X": # Goal
+ new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+ elif c == "$": # Box on goal
+ new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+ new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+ elif c == "%": # Player on goal
+ new_elem.obj = Player(x=j + pad_x, y=i + pad_y, game=self)
+ new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+ self.player = new_elem.obj
+ elif c not in " -": # Validation
+ raise ValueError(f"Invalid character in puzzle: {c}")
+
+
+class ReverseGame(Game):
+ def __init__(self, rng: Random, width=19, height=10, level=None):
+ super().__init__(width, height, level)
+ self.rng = rng
+ self.pad_x = 0
+ self.pad_y = 0
+
+ def load_puzzle(self, puzzle):
+ self.puzzle_size = (len(puzzle), len(puzzle[0]) if len(puzzle) > 0 else 0)
+ pad_x = (self.width - len(puzzle[0]) - 2) // 2
+ pad_y = (self.height - len(puzzle)) // 2
+ self.pad_x, self.pad_y = pad_x, pad_y
+ for i, row in enumerate(puzzle):
+ for j, c in enumerate(row):
+ new_elem = PuzzleElement(c)
+ self.puzzle[i + pad_y, j + pad_x] = new_elem
+ if c == "+": # wall
+ new_elem.obj = Obstacle(x=j + pad_x, y=i + pad_y)
+ elif c == "@": # box
+ new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+ elif c == "*": # player
+ new_elem.obj = ReversePlayer(rng=self.rng, x=j + pad_x, y=i + pad_y, game=self)
+ self.player = new_elem.obj
+ elif c == "X": # goal
+ new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+ elif c == "$": # box on goal
+ new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+ new_elem.obj = Box(x=j + pad_x, y=i + pad_y, game=self)
+ elif c == "%": # player on goal
+ new_elem.obj = ReversePlayer(rng=self.rng, x=j + pad_x, y=i + pad_y, game=self)
+ new_elem.ground = Goal(x=j + pad_x, y=i + pad_y)
+ self.player = new_elem.obj
diff --git a/reasoning_gym/games/contrib/sokoban/src/generator.py b/reasoning_gym/games/contrib/sokoban/src/generator.py
new file mode 100644
index 00000000..da4c954f
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/generator.py
@@ -0,0 +1,107 @@
+from random import Random
+
+import numpy as np
+
+from reasoning_gym.games.contrib.sokoban.src.astar import solve_astar
+from reasoning_gym.games.contrib.sokoban.src.game import Game, ReverseGame
+
+
+def num_boxes(puzzle_area, min_boxes, max_boxes, min_w, min_h, max_w, max_h):
+ m = (max_boxes - min_boxes) / (max_w * max_h - min_w * min_h)
+ b = min_boxes - m * min_w * min_h
+ return int(m * puzzle_area + b)
+
+
+def random_valid(rng: Random, width: int = 10, height: int = 10):
+ return rng.randrange(1, width - 1), rng.randrange(1, height - 1)
+
+
+def generate(
+ rng: Random,
+ debug: bool = False,
+ path: str = None,
+ min_w: int = 6,
+ min_h: int = 6,
+ max_w: int = 15,
+ max_h: int = 10,
+ min_boxes: int = 4,
+ max_boxes: int = 10,
+) -> tuple[str, str, dict]:
+ """
+ Generates a level with the given configuration parameters.
+
+ Parameters:
+ rng: Random number generator for reproducibility.
+ visualizer: Whether to visualize the generation process.
+ path: Path to save the level file (default 'levels/lvl0.dat').
+ min_w: Minimum width of the puzzle.
+ min_h: Minimum height of the puzzle.
+ max_w: Maximum width of the puzzle.
+ max_h: Maximum height of the puzzle.
+ min_boxes: Minimum number of boxes.
+ max_boxes: Maximum number of boxes.
+ Returns:
+ puzzle_string, solution
+ """
+ path = path or "levels/lvl0.dat"
+ while True:
+ width = rng.randint(min_w, max_w)
+ height = rng.randint(min_h, max_h)
+ puzzle = np.full((height, width), "+", dtype=" 0:
+ reverse_game.player.update(puzzle_size)
+ if player.states[player.curr_state] >= 20:
+ break
+ counter -= 1
+ slice_x = slice(reverse_game.pad_x, reverse_game.pad_x + width)
+ slice_y = slice(reverse_game.pad_y, reverse_game.pad_y + height)
+ matrix = reverse_game.puzzle[slice_y, slice_x]
+ # Optionally print the puzzle:
+ if debug:
+ player.print_puzzle(matrix)
+
+ out_of_place_boxes = np.sum([str(x) == "@" for x in matrix.flatten()])
+ if out_of_place_boxes >= boxes // 2:
+ # Optionally save the puzzle to a file:
+ # np.savetxt(path, matrix, fmt='%s')
+ puzzle_str = player.puzzle_to_string(matrix)
+
+ grid_list = [list(line) for line in puzzle_str.replace(" ", "").strip().split("\n")]
+ grid_array = np.array(grid_list)
+ solution, _ = solve_astar(grid_array)
+
+ if debug:
+ print(f"solution={solution}")
+ game = Game()
+ game.load_puzzle_matrix(grid_array)
+
+ for step, move in enumerate(solution):
+ print(f"move #{step}: {move}")
+ game.player.update(key=move)
+ game.print_puzzle()
+
+ difficulty = {"size": puzzle_size, "num_steps": len(solution)}
+ return puzzle_str, solution, difficulty
+ else:
+ if debug:
+ print(f"Not enough boxes out of place, retrying generation... [{out_of_place_boxes}/{boxes}]")
+
+
+if __name__ == "__main__":
+ generate(rng=Random(), debug=True)
diff --git a/reasoning_gym/games/contrib/sokoban/src/player.py b/reasoning_gym/games/contrib/sokoban/src/player.py
new file mode 100644
index 00000000..1299ea7c
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/player.py
@@ -0,0 +1,118 @@
+from collections import defaultdict
+from random import Random
+
+from reasoning_gym.games.contrib.sokoban.src.box import Box, Obstacle
+
+
+class Player:
+ """A player that can only push boxes"""
+
+ def __init__(self, x, y, game):
+ self.game = game
+ self.x = x
+ self.y = y
+
+ def update(self, key: str = None) -> int:
+ move = None
+ if key:
+ if key == "R":
+ move = (1, 0)
+ elif key == "L":
+ move = (-1, 0)
+ elif key == "U":
+ move = (0, -1)
+ elif key == "D":
+ move = (0, 1)
+ if move:
+ curr = self.y, self.x
+ target = self.y + move[1], self.x + move[0]
+ target_elem = self.game.puzzle[target]
+ if not (target_elem and target_elem.obj and isinstance(target_elem.obj, Obstacle)):
+ is_box = isinstance(target_elem.obj, Box)
+ if not is_box or (is_box and target_elem.obj.can_move(move)):
+ curr_elem = self.game.puzzle[curr]
+ self.y, self.x = target
+ curr_elem.char = "-" if not curr_elem.ground else "X"
+ curr_elem.obj = None
+ target_elem.char = "*" if not target_elem.ground else "%"
+ target_elem.obj = self
+ return 1
+ return 0
+
+
+class ReversePlayer(Player):
+ """A player that can only pull boxes"""
+
+ def __init__(self, rng: Random, x, y, game=None, puzzle=None):
+ super().__init__(x=x, y=y, game=game)
+ self.rng = rng
+ self.game = game
+ self.puzzle = puzzle
+ self.curr_state = ""
+ self.states = defaultdict(int)
+ self.prev_move = (0, 0)
+
+ def print_puzzle(self, matrix=None):
+ print(self.puzzle_to_string(matrix=matrix))
+
+ def puzzle_to_string(self, matrix=None):
+ matrix = matrix if matrix is not None else self.game.puzzle
+ height, width = len(matrix), len(matrix[0])
+ ss = ""
+ for h in range(height):
+ for w in range(width):
+ if matrix[h, w]:
+ ss = ss + str(matrix[h, w]) + " "
+ else:
+ ss = ss + "F" + " "
+ ss = ss + " " + "\n"
+ ss = ss + "\n"
+ return ss
+
+ def get_state(self):
+ state = ""
+ height, width = len(self.game.puzzle), len(self.game.puzzle[0])
+ for row in range(height):
+ for col in range(width):
+ if self.game.puzzle[row, col]:
+ state += str(self.game.puzzle[row, col])
+ return state
+
+ def update(self, puzzle_size):
+ height, width = puzzle_size
+ quick_chars = {
+ "*": "-",
+ "%": "X",
+ "+": "*",
+ "-": "*",
+ "X": "%",
+ "@": "-",
+ "$": "X",
+ }
+ moves_tuples = [(1, 0), (-1, 0), (0, -1), (0, 1)]
+ moves = self.rng.choices(moves_tuples, weights=[0.1 if m == self.prev_move else 1 for m in moves_tuples], k=1)
+ self.curr_state = self.get_state()
+ for move in moves:
+ self.states[self.curr_state] += 1
+ curr_pos = self.y, self.x
+ target = self.y + move[0], self.x + move[1]
+ reverse_target = self.y - move[0], self.x - move[1]
+ if (
+ target[1] == self.game.pad_x
+ or target[0] == self.game.pad_y
+ or target[1] >= self.game.pad_x + width - 1
+ or target[0] >= self.game.pad_y + height - 1
+ or (self.game.puzzle[target] and self.game.puzzle[target].char in "@$")
+ ):
+ self.prev_move = move
+ return
+ self.prev_move = -move[0], -move[1]
+ self.game.puzzle[curr_pos].char = quick_chars[self.game.puzzle[curr_pos].char]
+ self.game.puzzle[curr_pos].obj = None
+ self.game.puzzle[target].char = quick_chars[self.game.puzzle[target].char]
+ self.game.puzzle[target].obj = self
+ if (c := self.game.puzzle[reverse_target].char) in "@$":
+ self.game.puzzle[reverse_target].char = quick_chars[c]
+ self.game.puzzle[reverse_target].obj.reverse_move(move)
+
+ self.y, self.x = target
diff --git a/reasoning_gym/games/contrib/sokoban/src/utils.py b/reasoning_gym/games/contrib/sokoban/src/utils.py
new file mode 100644
index 00000000..106fb8d1
--- /dev/null
+++ b/reasoning_gym/games/contrib/sokoban/src/utils.py
@@ -0,0 +1,170 @@
+from heapq import heappop, heappush
+
+import numpy as np
+
+
+def print_state(state, shape):
+ if not state:
+ return
+ m, n = shape
+ matrix = np.array(list(state)).reshape(m, n)
+ print(matrix)
+
+
+def find_boxes_and_goals(state, shape):
+ _, width = shape
+ boxes, goals, boxes_on_goal = [], [], []
+ for pos, char in enumerate(state):
+ if char == "@":
+ boxes.append((pos // width, pos % width))
+ elif char in "X%":
+ goals.append((pos // width, pos % width))
+ elif char == "$":
+ boxes_on_goal.append((pos // width, pos % width))
+ return boxes, goals, boxes_on_goal
+
+
+def get_state(matrix):
+ return matrix.tobytes().decode("utf-8").replace("\x00", "")
+
+
+def is_solved(state):
+ return "@" not in state
+
+
+def manhattan_sum(state, player_pos, shape):
+ height, width = shape
+ player_x, player_y = player_pos
+ boxes, goals, _ = find_boxes_and_goals(state, shape)
+ boxes_cost = len(boxes) * height * width
+ player_cost = 0
+ for box_x, box_y in boxes:
+ boxes_cost += min(abs(box_x - goal_x) + abs(box_y - goal_y) for goal_x, goal_y in goals)
+ player_cost = min(abs(box_x - player_x) + abs(box_y - player_y) for box_x, box_y in boxes) if boxes else 0
+ return boxes_cost + player_cost
+
+
+def dijkstra(state, shape, box_pos=None, player_pos=None):
+ height, width = shape
+ dijk = np.array([[float("inf") for _ in range(width)] for _ in range(height)])
+ dijk[box_pos or player_pos] = 0
+ moves = [(1, 0), (-1, 0), (0, 1), (0, -1)]
+ heap = [(0, box_pos or player_pos)]
+ obstacles = "+" if player_pos else "+@$"
+ while heap:
+ distance, curr_pos = heappop(heap)
+ if distance > dijk[curr_pos]:
+ continue
+ for move in moves:
+ new_x, new_y = curr_pos[0] + move[0], curr_pos[1] + move[1]
+ new_pos = new_x, new_y
+ if 1 <= new_x < height - 1 and 1 <= new_y < width - 1 and state[new_x * width + new_y] not in obstacles:
+ new_distance = distance + 1
+ if new_distance < dijk[new_pos]:
+ dijk[new_pos] = new_distance
+ heappush(heap, (new_distance, new_pos))
+ return dijk
+
+
+def dijkstra_sum(state, player_pos, shape, distances):
+ height, width = shape
+ boxes, goals, boxes_on_goal = find_boxes_and_goals(state, shape)
+ boxes_cost = len(boxes) * height * width
+ player_cost = 0
+ for box in boxes + boxes_on_goal:
+ distances[box] = dijkstra(state, shape, box)
+ distances[player_pos] = dijkstra(state, shape, player_pos=player_pos)
+ for box in boxes:
+ boxes_cost += min(distances[box][goal] for goal in goals)
+ player_cost = min(distances[player_pos][box] for box in boxes) if boxes else 0
+ return boxes_cost + player_cost
+
+
+def is_deadlock(state, shape):
+ height, width = shape
+ if not state or len(state) != height * width:
+ return False
+ boxes, _, _ = find_boxes_and_goals(state, shape)
+ for bx, by in boxes: # corner deadlock
+ box = bx * width + by
+ if (
+ (state[box - 1] == "+" and state[box - width] == "+")
+ or (state[box + 1] == "+" and state[box + width] == "+")
+ or (state[box + 1] == "+" and state[box - width] == "+")
+ or (state[box - 1] == "+" and state[box + width] == "+")
+ ):
+ return True
+ double_box_positions = [
+ (0, -1, -width, -width - 1),
+ (0, 1, -width, -width + 1),
+ (0, -1, width - 1, width),
+ (0, 1, width + 1, width),
+ ]
+ for bx, by in boxes: # double box deadlock
+ box = bx * width + by
+ for pos in double_box_positions:
+ pos_set = set()
+ for dir in pos:
+ pos_set.add(state[box + dir])
+ if pos_set in ({"@", "+"}, {"@"}, {"@", "$"}, {"@", "$", "+"}):
+ return True
+ box = goal = 0
+ for i in range(width + 1, 2 * width - 1): # too many boxes deadlock
+ if state[i] == "@":
+ box += 1
+ elif state[i] in "X%":
+ goal += 1
+ if box > goal:
+ return True
+ box = goal = 0
+ for i in range(width * (height - 2) + 1, width * (height - 2) + width - 1):
+ if state[i] == "@":
+ box += 1
+ elif state[i] in "X%":
+ goal += 1
+ if box > goal:
+ return True
+ box = goal = 0
+ for i in range(width + 1, width * (height - 1) + 1, width):
+ if state[i] == "@":
+ box += 1
+ elif state[i] in "X%":
+ goal += 1
+ if box > goal:
+ return True
+ box = goal = 0
+ for i in range(2 * width - 2, width * height - 2, width):
+ if state[i] == "@":
+ box += 1
+ elif state[i] in "X%":
+ goal += 1
+ if box > goal:
+ return True
+ return False
+
+
+def can_move(state, shape, player_pos, move):
+ new_state = list(state)
+ x, y = player_pos
+ _, width = shape
+ move_cost = 0
+ target = x + move[0], y + move[1]
+ boxtarget = x + move[0] * 2, y + move[1] * 2
+ curr1d = x * width + y
+ target1d = target[0] * width + target[1]
+ boxtarget1d = boxtarget[0] * width + boxtarget[1]
+ if state[target1d] == "+":
+ return None, move_cost
+ elif state[target1d] in "-X":
+ new_state[curr1d] = "-" if new_state[curr1d] == "*" else "X"
+ new_state[target1d] = "*" if new_state[target1d] == "-" else "%"
+ move_cost = 3
+ elif state[target1d] in "@$":
+ if state[boxtarget1d] in "+@$":
+ return None, move_cost
+ elif state[boxtarget1d] in "-X":
+ new_state[boxtarget1d] = "@" if new_state[boxtarget1d] == "-" else "$"
+ new_state[target1d] = "*" if new_state[target1d] == "@" else "%"
+ new_state[curr1d] = "-" if new_state[curr1d] == "*" else "X"
+ move_cost = 0 if new_state[boxtarget1d] == "$" else 2
+ return "".join(new_state), move_cost
diff --git a/reasoning_gym/games/sokoban.py b/reasoning_gym/games/sokoban.py
new file mode 100644
index 00000000..f96d87ea
--- /dev/null
+++ b/reasoning_gym/games/sokoban.py
@@ -0,0 +1,117 @@
+from dataclasses import dataclass
+from random import Random
+from typing import Dict, Optional
+
+import numpy as np
+
+from ..factory import ProceduralDataset, register_dataset
+
+
+@dataclass
+class SokobanConfig:
+ """Configuration for sokoban puzzle generation"""
+
+ seed: Optional[int] = None
+ size: int = 500
+ min_w: int = 6 # Minimum width of the puzzle.
+ min_h: int = 6 # Minimum height of the puzzle.
+ max_w: int = 10 # Maximum width of the puzzle.
+ max_h: int = 10 # Maximum height of the puzzle.
+ min_boxes: int = 6 # Minimum number of boxes.
+ max_boxes: int = 10 # Maximum number of boxes.
+
+ def validate(self):
+ """Validate configuration parameters"""
+ assert self.min_w <= self.max_w, "min_w must be lte max_w"
+ assert self.min_h <= self.max_h, "min_h must be lte max_h"
+ assert self.min_boxes <= self.max_boxes, "min_boxes must be lte max_boxes"
+
+
+class SokobanDataset(ProceduralDataset):
+ """Generates Sokoban games with configurable parameters"""
+
+ def __init__(self, config: SokobanConfig):
+ self._prompt_templates = [
+ "What will this Sokoban board look like after {simulation_steps} steps of simulation?\n\n{board}"
+ ]
+
+ super().__init__(config=config, seed=config.seed, size=config.size)
+
+ # lazy loading of sokoban imports
+ from .contrib.sokoban.src.game import Game
+ from .contrib.sokoban.src.generator import generate
+ from .contrib.sokoban.src.utils import is_solved
+
+ self._Game = Game
+ self._generate = generate
+ self._is_solved = is_solved
+
+ def __getitem__(self, idx: int) -> dict:
+ """Generate a single Sokoban task
+
+ Returns:
+ dict with keys:
+ - question: str, the task description
+ - answer: str, a solution string
+ - metadata: dict with generation parameters
+ """
+
+ # Make the Sokoban!
+ rng = Random(self.seed + idx)
+ gamestr, solution, difficulty = self._generate(rng=rng)
+
+ return {
+ "question": """You are going to solve a 'sokoban' puzzle.
+
+* - The player
+% - The player on a goal
+@ - A box
+X - A goal
+$ - A box on a goal
++ - A wall
+- - An empty position
+
+Your solution must be a string of characters, ex: LDURRUDL.
+
+Here is your puzzle:
+"""
+ + gamestr,
+ "answer": solution,
+ "metadata": {"gamestr": gamestr, "difficulty": difficulty},
+ }
+
+ def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+ """Determine if the solution provided solves the Sokoban task.
+
+ The function awards 1.0 for a correct answer.
+
+ Args:
+ answer (Optional[str]): The user's answer.
+ entry (Dict[str, any]): The original dataset entry containing the correct answer.
+
+ Returns:
+ float: The computed score between 0.0 and 1.0.
+ """
+
+ if answer == None:
+ return 0.0
+
+ try:
+ grid_list = [list(line) for line in entry["metadata"]["gamestr"].replace(" ", "").strip().split("\n")]
+ matrix = np.array(grid_list)
+
+ game = self._Game()
+ game.load_puzzle_matrix(matrix)
+
+ for move in answer:
+ game.player.update(key=move)
+
+ if self._is_solved(game.get_curr_state()):
+ return 1.0
+ except Exception as e:
+ return 0.01
+
+ return 0.1
+
+
+register_dataset("sokoban", SokobanDataset, SokobanConfig)
diff --git a/reasoning_gym/games/tsumego.py b/reasoning_gym/games/tsumego.py
new file mode 100644
index 00000000..be1e4fd6
--- /dev/null
+++ b/reasoning_gym/games/tsumego.py
@@ -0,0 +1,305 @@
+"""Go problem (tsumego) generator"""
+
+"""
+This module generates one-move Tsumego puzzles, which are Go problems focused on tactical capture scenarios.
+
+The puzzles generated here have the following characteristics:
+- They are created on a board of configurable size (with a minimum and maximum board size).
+- A number of stones are randomly placed on the board, subject to a maximum stone limit.
+- A specific capture problem is then constructed by arranging white stones in a plus-shaped formation.
+- Extra liberties surrounding this white group are filled with black stones, except for one key liberty.
+ This forces a situation where a single move by Black (at the remaining liberty) results in a capture.
+- Puzzle generation is deterministic given a seed, which ensures reproducibility.
+
+These puzzles are intended to provide focused practice on reading and executing capturing moves in Go.
+
+TODO: Generate multi-step Tsumego problems.
+"""
+
+import re
+from dataclasses import dataclass
+from random import Random
+from typing import Any, Dict, List, Optional, Set, Tuple
+
+from ..factory import ProceduralDataset, register_dataset
+
+# Added constant to avoid repetition of adjacent directions
+DIRECTIONS = [(-1, 0), (1, 0), (0, -1), (0, 1)]
+
+
+@dataclass
+class TsumegoConfig:
+ """Configuration for Tsumego problem generation"""
+
+ min_board_size: int = 9
+ max_board_size: int = 13
+ max_stones: int = 15
+ size: int = 100
+ seed: Optional[int] = None
+
+ def __post_init__(self):
+ """Validate configuration parameters"""
+ if self.min_board_size < 5:
+ raise ValueError("min_board_size must be at least 5")
+ if self.max_board_size > 19:
+ raise ValueError("max_board_size must be at most 19")
+ if self.min_board_size > self.max_board_size:
+ raise ValueError("min_board_size must be less than or equal to max_board_size")
+ if self.max_stones < 5:
+ raise ValueError("max_stones must be at least 5")
+
+
+class TsumegoDataset(ProceduralDataset):
+ """Generates Tsumego problems with configurable parameters"""
+
+ def __init__(self, config: TsumegoConfig):
+ self._prompt_templates = [
+ "Tsumego time. Black to play and capture some stones.\nFind the key move.",
+ "I have a Go problem for you. Black moves next - can you capture some of the white stones?",
+ "Here's a Go challenge. Playing as Black, how can you capture as many white stones as possible?",
+ ]
+ self._ko_point = None
+ super().__init__(config=config, seed=config.seed, size=config.size)
+
+ # New helper method for board copying
+ def _copy_board(self, board: List[List[str]]) -> List[List[str]]:
+ """Return a deep copy of the board."""
+ return [row[:] for row in board]
+
+ def _get_liberties(self, board: List[List[str]], row: int, col: int) -> Set[Tuple[int, int]]:
+ """Get empty adjacent points (liberties) for a stone"""
+ size = len(board)
+ liberties = set()
+ for dr, dc in DIRECTIONS:
+ r, c = row + dr, col + dc
+ if 0 <= r < size and 0 <= c < size and board[r][c] == ".":
+ liberties.add((r, c))
+ return liberties
+
+ def _get_group(self, board: List[List[str]], row: int, col: int) -> Set[Tuple[int, int]]:
+ """Get all stones in the same group (connected stones of same color)"""
+ size = len(board)
+ color = board[row][col]
+ if color == ".":
+ return set()
+
+ group = {(row, col)}
+ queue = [(row, col)]
+ while queue:
+ r, c = queue.pop(0)
+ for dr, dc in DIRECTIONS:
+ nr, nc = r + dr, c + dc
+ if 0 <= nr < size and 0 <= nc < size and board[nr][nc] == color and (nr, nc) not in group:
+ group.add((nr, nc))
+ queue.append((nr, nc))
+ return group
+
+ def _count_liberties(self, board: List[List[str]], group: Set[Tuple[int, int]]) -> int:
+ """Count total liberties for a group of stones"""
+ liberties = set()
+ for row, col in group:
+ liberties.update(self._get_liberties(board, row, col))
+ return len(liberties)
+
+ def _would_capture(self, board: List[List[str]], row: int, col: int, color: str) -> bool:
+ """Check if a move would capture any opponent stones"""
+ size = len(board)
+ opponent = "O" if color == "X" else "X"
+
+ # Make a copy of the board and place the stone
+ board_copy = self._copy_board(board)
+ board_copy[row][col] = color
+
+ checked = set()
+ for dr, dc in DIRECTIONS:
+ r, c = row + dr, col + dc
+ if 0 <= r < size and 0 <= c < size and board_copy[r][c] == opponent and (r, c) not in checked:
+ group = self._get_group(board_copy, r, c)
+ checked.update(group)
+ if self._count_liberties(board_copy, group) == 0:
+ return True
+ return False
+
+ def _is_valid_move(self, board: List[List[str]], row: int, col: int, color: str) -> bool:
+ """Check if a move is legal (not suicide, unless it captures)"""
+ size = len(board)
+ if not (0 <= row < size and 0 <= col < size):
+ return False
+ if board[row][col] != ".":
+ return False
+ if (row, col) == self._ko_point:
+ return False
+
+ # If the move captures opponent stones, it's valid
+ if self._would_capture(board, row, col, color):
+ return True
+
+ board_copy = self._copy_board(board)
+ board_copy[row][col] = color
+ group = self._get_group(board_copy, row, col)
+ return self._count_liberties(board_copy, group) > 0
+
+ def _make_move(self, board: List[List[str]], row: int, col: int, color: str) -> bool:
+ """Make a move and update ko point. Returns True if move was valid."""
+ if not self._is_valid_move(board, row, col, color):
+ return False
+
+ self._ko_point = None
+ board[row][col] = color
+ opponent = "O" if color == "X" else "X"
+ captured_stones = []
+
+ for dr, dc in DIRECTIONS:
+ r, c = row + dr, col + dc
+ if 0 <= r < len(board) and 0 <= c < len(board) and board[r][c] == opponent:
+ group = self._get_group(board, r, c)
+ if self._count_liberties(board, group) == 0:
+ captured_stones.extend(group)
+
+ if len(captured_stones) == 1 and len(self._get_group(board, row, col)) == 1:
+ self._ko_point = captured_stones[0]
+
+ for r, c in captured_stones:
+ board[r][c] = "."
+
+ return True
+
+ def _generate_capture_problem(self, size: int, rng: Random) -> Tuple[List[List[str]], Tuple[int, int]]:
+ """Generate a capture problem"""
+ board = [["." for _ in range(size)] for _ in range(size)]
+ stones_placed = 0
+ max_stones = self.config.max_stones - 4 # Reserve space for capture setup
+
+ while stones_placed < max_stones:
+ row = rng.randint(0, size - 1)
+ col = rng.randint(0, size - 1)
+ color = "X" if rng.random() < 0.5 else "O"
+ if board[row][col] == "." and self._is_valid_move(board, row, col, color):
+ self._make_move(board, row, col, color)
+ stones_placed += 1
+
+ tries = 0
+ formation_options = {
+ "plus": {
+ "white_offsets": [(0, 0), (-1, 0), (1, 0), (0, -1)],
+ "forced_move_offset": (0, 1),
+ "neighbor_offsets": [(0, 0), (-1, 0), (1, 0), (0, -1), (0, 1)],
+ },
+ "L": {
+ "white_offsets": [(0, 0), (0, 1), (1, 0)],
+ "forced_move_offset": (1, 1),
+ "neighbor_offsets": [(0, 0), (0, 1), (1, 0), (1, 1)],
+ },
+ "T": {
+ "white_offsets": [(0, -1), (0, 0), (0, 1), (1, 0)],
+ "forced_move_offset": (-1, 0),
+ "neighbor_offsets": [(0, -1), (0, 0), (0, 1), (1, 0), (-1, 0)],
+ },
+ }
+
+ while tries < 50:
+ row = rng.randint(1, size - 2)
+ col = rng.randint(1, size - 2)
+ formation_type = rng.choice(list(formation_options.keys()))
+ formation = formation_options[formation_type]
+ if all(board[row + dr][col + dc] == "." for dr, dc in formation["neighbor_offsets"]):
+ # Place white stones according to chosen formation
+ for dr, dc in formation["white_offsets"]:
+ board[row + dr][col + dc] = "O"
+ forced_move = (row + formation["forced_move_offset"][0], col + formation["forced_move_offset"][1])
+ white_group = {(row + dr, col + dc) for dr, dc in formation["white_offsets"]}
+ extra_liberties = set()
+ for r, c in white_group:
+ extra_liberties |= self._get_liberties(board, r, c)
+ extra_liberties.discard(forced_move)
+ for r, c in extra_liberties:
+ board[r][c] = "X"
+
+ # Add decoy stone to enhance puzzle difficulty
+ current_stone_count = sum(cell in "XO" for row in board for cell in row)
+ if current_stone_count < self.config.max_stones + 7:
+ center = (row, col) # using the base white stone as center
+ decoy_candidates = []
+ for i in range(center[0] - 2, center[0] + 3):
+ for j in range(center[1] - 2, center[1] + 3):
+ if abs(i - center[0]) + abs(j - center[1]) == 2:
+ if 0 <= i < size and 0 <= j < size and board[i][j] == "." and (i, j) != forced_move:
+ decoy_candidates.append((i, j))
+ if decoy_candidates:
+ decoy_pos = rng.choice(decoy_candidates)
+ decoy_color = "X" if rng.random() < 0.5 else "O"
+ board[decoy_pos[0]][decoy_pos[1]] = decoy_color
+
+ if self._is_valid_move(board, forced_move[0], forced_move[1], "X"):
+ return board, forced_move
+ tries += 1
+ raise RuntimeError("Failed to generate a capture problem")
+
+ def _board_to_string(self, board: List[List[str]]) -> str:
+ """Convert board to string representation"""
+ size = len(board)
+ # Column labels
+ cols = " " + " ".join(chr(ord("A") + i) for i in range(size)) + "\n"
+ # Board with row numbers
+ rows = [f"{size-i:2d} {' '.join(row)}" for i, row in enumerate(board)]
+ return cols + "\n".join(rows)
+
+ def __getitem__(self, idx: int) -> dict:
+ """Generate a single Tsumego problem
+
+ Returns:
+ dict with:
+ - "question": Problem description and board state
+ - "answer": Solution move(s)
+ - "metadata": Problem details and configuration
+ """
+ rng = Random(self.seed + idx if self.seed is not None else None)
+ size = rng.randint(self.config.min_board_size, self.config.max_board_size)
+
+ board, solution = self._generate_capture_problem(size, rng)
+ board_str = self._board_to_string(board)
+ solution_str = f"{chr(ord('A')+solution[1])}{size - solution[0]}"
+ self._ko_point = None
+
+ return {
+ "question": (
+ rng.choice(self._prompt_templates) + "\n\n" + board_str + "\n\n"
+ "X - Black\n"
+ "O - White\n\n"
+ "Specify your move in coordinates (e.g. 'C4' for column C, row 4)"
+ ),
+ "answer": solution_str,
+ "metadata": {"difficulty": {"board_size": size}, "board": board, "solution": solution_str},
+ }
+
+ def score_answer(self, answer: Optional[str], entry: Dict[str, Any]) -> float:
+ """Score the answer against the solution"""
+ if answer is None:
+ return 0.0
+ answer = answer.strip()
+ if not answer:
+ return 0.01
+ metadata = entry["metadata"]
+ board_size = len(metadata["board"])
+ expected_row, expected_col = metadata["solution"] # get solution from (row, col) tuple
+
+ try:
+ # Assume letter-number format, e.g. "C4"
+ m = re.match(r"^([A-Za-z])(\d+)$", answer)
+ if not m:
+ return 0.01
+ col_letter, row_str = m.group(1), m.group(2)
+ row = board_size - int(row_str)
+ col = ord(col_letter.upper()) - ord("A")
+ if (row, col) == (expected_row, expected_col):
+ return 1.0
+
+ if 0 <= row < board_size and 0 <= col < board_size:
+ return 0.05
+ except Exception:
+ return 0.01
+ return 0.01
+
+
+# Register the dataset
+register_dataset("tsumego", TsumegoDataset, TsumegoConfig)
diff --git a/reasoning_gym/logic/__init__.py b/reasoning_gym/logic/__init__.py
index dfa1c7ad..c05c4dba 100644
--- a/reasoning_gym/logic/__init__.py
+++ b/reasoning_gym/logic/__init__.py
@@ -4,6 +4,7 @@ Logic tasks for training reasoning capabilities.
from .aiw import AliceInWonderlandConfig, AliceInWonderlandDataset
from .propositional_logic import PropositionalLogicConfig, PropositionalLogicDataset
+from .self_reference import SelfReferenceConfig, SelfReferenceDataset
from .syllogisms import SyllogismConfig, SyllogismDataset, Term
from .zebra_puzzles import ZebraConfig, ZebraDataset
@@ -18,4 +19,6 @@ __all__ = [
"Term",
"ZebraConfig",
"ZebraDataset",
+ "SelfReference",
+ "SelfReferenceDataset",
]
diff --git a/reasoning_gym/logic/self_reference.py b/reasoning_gym/logic/self_reference.py
new file mode 100644
index 00000000..d8155b4c
--- /dev/null
+++ b/reasoning_gym/logic/self_reference.py
@@ -0,0 +1,373 @@
+from dataclasses import dataclass
+from random import Random
+from typing import Dict, Optional
+
+from ..factory import ProceduralDataset, register_dataset
+
+
+def is_prime(n):
+ """Return True if n is a prime number, False otherwise."""
+ if n < 2:
+ return False
+ for i in range(2, int(n**0.5) + 1):
+ if n % i == 0:
+ return False
+ return True
+
+
+def is_composite(n):
+ """
+ Return True if n is composite.
+ (Composite means an integer greater than 1 that is not prime.)
+ """
+ return n > 1 and not is_prime(n)
+
+
+def generate_dynamic_puzzle(difficulty, rng):
+ """
+ Dynamically generates a 7-statement self-referential puzzle.
+
+ The seven statements (with parameters determined by this function) are:
+
+ 1. "At least a of these 7 statements are true."
+ 2. "At most b of these 7 statements are false."
+ 3. "Exactly c of these 7 statements are true."
+ 4. "Exactly d of these 7 statements are false."
+ 5. "Either Statement 3 or Statement 4 is true, but not both."
+ 6. "The number of true statements is a prime number."
+ 7. "The number of false statements is a composite number."
+
+ The idea is to choose an intended number T (1 ≤ T ≤ 6) of true statements
+ and then “plant” an intended solution. In our construction the truth values
+ for Statements 6 and 7 are forced by T (e.g. Statement 6 should be true exactly
+ when T is prime). For the first four statements the numeric parameters (a, b, c, d)
+ are chosen so that the statement evaluates correctly when compared to T.
+
+ The difficulty parameter (an integer, e.g. 1 for easy up to 10 for hard)
+ influences how “borderline” the numeric choices are. At lower difficulty the numbers
+ are chosen with a clear gap; at higher difficulty they are chosen closer to T.
+
+ Returns:
+ dict: A puzzle dictionary containing:
+ - 'n': number of statements (always 7 here),
+ - 'statements_text': a list of 7 strings (one per statement),
+ - 'parameters': a dict with the numeric parameters (for statements 1-4),
+ - 'intended_assignment': the intended truth values (list of 7 booleans),
+ - 'intended_T': the intended number of true statements.
+ """
+ n = 7
+
+ # Choose an intended number of true statements, T, from 1 to 6 (nontrivial).
+ T = rng.choice(range(1, n))
+
+ # For the global statements (6 and 7), the intended truth is forced:
+ intended6 = is_prime(T) # Statement 6 must be true if T is prime.
+ intended7 = is_composite(n - T) # Statement 7 must be true if (# false) is composite.
+
+ # Among statements 1-5, we need exactly k trues such that overall the total becomes T.
+ # Let k = T - (truth from statements 6 and 7).
+ forced_true_count = (1 if intended6 else 0) + (1 if intended7 else 0)
+ k = T - forced_true_count
+ # k must be between 0 and 5.
+ if not (0 <= k <= 5):
+ # If for some reason it is not in range, fall back to a known configuration (T=4).
+ T = 4
+ intended6 = False
+ intended7 = False
+ k = 4 # so that overall T=4.
+ intended_assignment_15 = [True, True, True, True, False]
+ else:
+ # For statements 1-5, randomly choose which ones are intended true.
+ # We'll index these as 0..4 corresponding to statements 1..5.
+ intended_assignment_15 = [False] * 5
+ if k > 0:
+ true_indices = set(rng.sample(range(5), k))
+ for i in true_indices:
+ intended_assignment_15[i] = True
+
+ # Now, for statements 1-4, choose numeric parameters based on whether the statement is
+ # intended to be true or false. We use the difficulty parameter to control the "margin."
+ #
+ # For statement 1: "At least a of these 7 statements are true."
+ # The condition is: T >= a.
+ def choose_at_least_param(T, intended, diff, rng):
+ # diff will be used as a margin factor: lower diff => wider gap.
+ if intended: # must have a <= T.
+ # At easy difficulty, choose a clearly below T (if possible).
+ low = 1
+ high = T
+ # At lower difficulty, bias toward the lower end.
+ return rng.randint(low, high)
+ else: # must have a > T.
+ low = T + 1
+ high = n # a can be at most n.
+ if low > high:
+ return n
+ return rng.randint(low, high)
+
+ a_param = choose_at_least_param(T, intended_assignment_15[0], difficulty, rng)
+
+ # For statement 2: "At most b of these 7 statements are false."
+ # F = n - T, so condition is: (n - T) <= b <=> T >= n - b.
+ def choose_at_most_param(T, intended, diff, rng):
+ if intended: # b must be >= n - T.
+ low = n - T
+ high = n
+ return rng.randint(low, high)
+ else:
+ # b must be < n - T.
+ low = 0
+ high = max(n - T - 1, 0)
+ return rng.randint(low, high)
+
+ b_param = choose_at_most_param(T, intended_assignment_15[1], difficulty, rng)
+
+ # For statement 3: "Exactly c of these 7 statements are true."
+ def choose_exactly_true_param(T, intended, diff, rng):
+ if intended:
+ return T
+ else:
+ choices = [x for x in range(0, n + 1) if x != T]
+ return rng.choice(choices)
+
+ c_param = choose_exactly_true_param(T, intended_assignment_15[2], difficulty, rng)
+
+ # For statement 4: "Exactly d of these 7 statements are false."
+ # Condition: (n - T) == d.
+ def choose_exactly_false_param(T, intended, diff, rng):
+ false_count = n - T
+ if intended:
+ return false_count
+ else:
+ choices = [x for x in range(0, n + 1) if x != false_count]
+ return rng.choice(choices)
+
+ d_param = choose_exactly_false_param(T, intended_assignment_15[3], difficulty, rng)
+
+ # For statement 5: "Either Statement 3 or Statement 4 is true, but not both."
+ # We do not need a parameter here; the intended condition is that the truth values for
+ # statements 3 and 4 (which are positions 2 and 3 in our 0-indexed list) differ.
+ # The intended truth for statement 5 is taken from our assignment.
+ # (Later the verification function will check: solution[2] != solution[3].)
+
+ # Build the intended assignment for all 7 statements.
+ # For statements 1-5, we use our generated intended_assignment_15.
+ intended_assignment = [
+ intended_assignment_15[0],
+ intended_assignment_15[1],
+ intended_assignment_15[2],
+ intended_assignment_15[3],
+ intended_assignment_15[4],
+ intended6,
+ intended7,
+ ]
+
+ # (If the total intended true count doesn't equal T, adjust statement 5.)
+ current_T = sum(intended_assignment)
+ if current_T != T:
+ # Since only statement 5 is free (its parameter wasn't numeric),
+ # force its intended truth to be what is needed.
+ intended_assignment[4] = T - (current_T - (1 if intended_assignment[4] else 0)) == 1
+
+ # Now build the text for each statement.
+ statements_text = [
+ f"Statement 1: 'At least {a_param} of these 7 statements are true.'",
+ f"Statement 2: 'At most {b_param} of these 7 statements are false.'",
+ f"Statement 3: 'Exactly {c_param} of these 7 statements are true.'",
+ f"Statement 4: 'Exactly {d_param} of these 7 statements are false.'",
+ "Statement 5: 'Either Statement 3 or Statement 4 is true, but not both.'",
+ "Statement 6: 'The number of true statements is a prime number.'",
+ "Statement 7: 'The number of false statements is a composite number.'",
+ ]
+
+ return {
+ "n": n,
+ "statements_text": statements_text,
+ "parameters": {
+ "a": a_param,
+ "b": b_param,
+ "c": c_param,
+ "d": d_param,
+ },
+ "intended_assignment": intended_assignment,
+ "intended_T": T,
+ "difficulty": difficulty,
+ }
+
+
+def verify_solution_dynamic(puzzle, solution):
+ """
+ Verifies a candidate solution for a dynamically generated puzzle.
+
+ The rules are:
+ - If a statement is marked True, then its claim must hold.
+ - If a statement is marked False, then its claim must fail.
+
+ The conditions are as follows:
+ 1. "At least a of these 7 statements are true." => (T >= a)
+ 2. "At most b of these 7 statements are false." => (F <= b)
+ 3. "Exactly c of these 7 statements are true." => (T == c)
+ 4. "Exactly d of these 7 statements are false." => (F == d)
+ 5. "Either Statement 3 or Statement 4 is true, but not both." => (solution[2] != solution[3])
+ 6. "The number of true statements is a prime number." => is_prime(T)
+ 7. "The number of false statements is a composite number." => is_composite(F)
+
+ Parameters:
+ puzzle (dict): The puzzle dictionary returned by generate_dynamic_puzzle.
+ solution (list of bool): A candidate assignment (length 7).
+
+ Returns:
+ bool: True if candidate is self-consistent; False otherwise.
+ """
+ n = puzzle["n"]
+ if len(solution) != n:
+ return False
+ T = sum(solution)
+ F = n - T
+ params = puzzle["parameters"]
+
+ # Statement 1: "At least a of these 7 statements are true."
+ cond1 = T >= params["a"]
+ if solution[0] and not cond1:
+ return False
+ if not solution[0] and cond1:
+ return False
+
+ # Statement 2: "At most b of these 7 statements are false."
+ cond2 = F <= params["b"]
+ if solution[1] and not cond2:
+ return False
+ if not solution[1] and cond2:
+ return False
+
+ # Statement 3: "Exactly c of these 7 statements are true."
+ cond3 = T == params["c"]
+ if solution[2] and not cond3:
+ return False
+ if not solution[2] and cond3:
+ return False
+
+ # Statement 4: "Exactly d of these 7 statements are false."
+ cond4 = F == params["d"]
+ if solution[3] and not cond4:
+ return False
+ if not solution[3] and cond4:
+ return False
+
+ # Statement 5: "Either Statement 3 or Statement 4 is true, but not both."
+ cond5 = solution[2] != solution[3]
+ if solution[4] and not cond5:
+ return False
+ if not solution[4] and cond5:
+ return False
+
+ # Statement 6: "The number of true statements is a prime number."
+ cond6 = is_prime(T)
+ if solution[5] and not cond6:
+ return False
+ if not solution[5] and cond6:
+ return False
+
+ # Statement 7: "The number of false statements is a composite number."
+ cond7 = is_composite(F)
+ if solution[6] and not cond7:
+ return False
+ if not solution[6] and cond7:
+ return False
+
+ return True
+
+
+def print_puzzle_dynamic(puzzle):
+ """Prints the dynamically generated puzzle."""
+ x = ""
+ for stmt in puzzle["statements_text"]:
+ x = x + " - " + stmt + "\n"
+ return x
+
+
+def solve_puzzle_dynamic(puzzle):
+ """
+ Searches all 2^7 possible truth assignments and returns those that
+ are self-consistent with the generated puzzle.
+ """
+ n = puzzle["n"]
+ valid_solutions = []
+ for i in range(2**n):
+ candidate = [(i >> j) & 1 == 1 for j in range(n)]
+ if verify_solution_dynamic(puzzle, candidate):
+ valid_solutions.append(candidate)
+ return valid_solutions
+
+
+@dataclass
+class SelfReferenceConfig:
+ """Configuration for SelfReference puzzle generation"""
+
+ difficulty: int = 5
+ seed: Optional[int] = None
+ size: int = 500
+
+ def validate(self):
+ """Validate configuration parameters"""
+ assert 1 <= self.difficulty <= 10, "difficulty must be between 1 and 10"
+
+
+class SelfReferenceDataset(ProceduralDataset):
+ """Generates self-referential puzzles"""
+
+ def __init__(self, config: SelfReferenceConfig):
+ super().__init__(config=config, seed=config.seed, size=config.size)
+
+ def __getitem__(self, idx: int) -> dict:
+ """Generate a single SelfReference task
+
+ Returns:
+ dict with keys:
+ - question: str, the task description
+ - answer: str, a solution string
+ - metadata: dict with generation parameters
+ """
+ rng = Random(self.seed + idx)
+
+ # Generate puzzle
+ puzzle = generate_dynamic_puzzle(self.config.difficulty, rng)
+ puzz_s = (
+ "Given the truthfulness of these statements, please tell me the number of possible solutions: \n"
+ + print_puzzle_dynamic(puzzle)
+ )
+
+ # Solve puzzle
+ solutions = solve_puzzle_dynamic(puzzle)
+ for idx, sol in enumerate(solutions, start=1):
+ sol_str = ["True" if s else "False" for s in sol]
+ answer = len(solutions)
+
+ return {
+ "question": puzz_s,
+ "answer": answer,
+ "metadata": {},
+ }
+
+ def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+ """Determine if the solution provided solves the SelfReference task.
+
+ The function awards 1.0 for a correct answer.
+
+ Args:
+ answer (Optional[str]): The user's answer.
+ entry (Dict[str, any]): The original dataset entry containing the correct answer.
+
+ Returns:
+ float: The computed score between 0.0 and 1.0.
+ """
+
+ if answer == None:
+ return 0.0
+ if str(answer) != str(entry["answer"]):
+ return 0.1
+ else:
+ return 1.0 # Yay
+
+
+register_dataset("self_reference", SelfReferenceDataset, SelfReferenceConfig)
diff --git a/reasoning_gym/logic/syllogisms.py b/reasoning_gym/logic/syllogisms.py
index a5bbb219..37b87a6f 100644
--- a/reasoning_gym/logic/syllogisms.py
+++ b/reasoning_gym/logic/syllogisms.py
@@ -22,23 +22,21 @@ class Term:
self.name = name
self.plural = plural
+ def __repr__(self) -> str:
+ """Return string representation of the term"""
+ return f"Term({self.name}, {self.plural})"
+
@dataclass
class SyllogismConfig:
"""Configuration for syllogism task generation"""
- # Lists of terms to use in syllogisms
- terms: List[Term] = None # Will be populated with defaults if None
-
# Control which quantifiers to use
allow_all: bool = True
allow_no: bool = True
allow_some: bool = True
allow_some_not: bool = True
- # Whether to include invalid syllogisms as negative examples
- include_invalid: bool = True
-
# Percentage of invalid examples if included (0.0 to 1.0)
invalid_ratio: float = 0.3
@@ -101,7 +99,7 @@ class SyllogismDataset(ProceduralDataset):
def __init__(self, config: SyllogismConfig):
super().__init__(config=config, seed=config.seed, size=config.size)
- self.terms = self.DEFAULT_TERMS if config.terms is None else config.terms
+ self.terms = self.DEFAULT_TERMS
def _get_allowed_quantifiers(self) -> List[Quantifier]:
"""Get list of allowed quantifiers based on config"""
@@ -116,95 +114,126 @@ class SyllogismDataset(ProceduralDataset):
quantifiers.append(Quantifier.SOME_NOT)
return quantifiers
+ @staticmethod
def _is_valid_syllogism(
- self,
- premise1: Tuple[Quantifier, Term, Term],
- premise2: Tuple[Quantifier, Term, Term],
- conclusion: Tuple[Quantifier, Term, Term],
+ premise1: Tuple[Quantifier, "Term", "Term"],
+ premise2: Tuple[Quantifier, "Term", "Term"],
+ conclusion: Tuple[Quantifier, "Term", "Term"],
) -> bool:
"""
- Check if a syllogism is logically valid using classical logic rules.
-
- Rules implemented:
- 1. Universal Affirmative (ALL):
- - If both premises are ALL, conclusion must be ALL
- - ALL A are B + ALL B are C → ALL A are C (Barbara)
-
- 2. Universal Negative (NO):
- - If one premise is NO and other is ALL, conclusion must be NO
- - NO A are B + ALL C are B → NO A are C (Celarent)
- - ALL A are B + NO C are B → NO A are C (Cesare)
-
- 3. Particular Affirmative (SOME):
- - If one premise is SOME and other is ALL, conclusion must be SOME
- - SOME A are B + ALL B are C → SOME A are C (Darii)
- - ALL A are B + SOME C are B → SOME A are C (Disamis)
-
- 4. Particular Negative (SOME_NOT):
- - If one premise is SOME_NOT and other is ALL, conclusion can be SOME_NOT
- - SOME A are not B + ALL B are C → SOME A are not C (Ferio)
- - ALL A are B + SOME C are not B → SOME A are not C (Festino)
-
- 5. Invalid combinations:
- - Two negative premises never yield a valid conclusion
- - Two particular premises never yield a valid conclusion
- - If both premises are particular, no valid conclusion
- - If conclusion is universal but either premise is particular, invalid
+ Checks whether a given syllogism is valid under classical (Aristotelian) rules,
+ including the distribution rule:
+ - If a term is distributed in the conclusion, it must be distributed
+ in the premise where it appears as subject/predicate.
"""
- q1, t1_1, t1_2 = premise1
- q2, t2_1, t2_2 = premise2
- qc, tc_1, tc_2 = conclusion
- # Rule 5: Two negative premises -> invalid
- if q1 in (Quantifier.NO, Quantifier.SOME_NOT) and q2 in (Quantifier.NO, Quantifier.SOME_NOT):
+ # --- 1) Extract data ---
+ q1, p1_subj, p1_pred = premise1
+ q2, p2_subj, p2_pred = premise2
+ q3, c_subj, c_pred = conclusion
+
+ negative_set = {Quantifier.NO, Quantifier.SOME_NOT}
+ particular_set = {Quantifier.SOME, Quantifier.SOME_NOT}
+ universal_set = {Quantifier.ALL, Quantifier.NO}
+
+ # --- 2) Identify a unique middle term ---
+ premise1_terms = {p1_subj, p1_pred}
+ premise2_terms = {p2_subj, p2_pred}
+ common_terms = premise1_terms.intersection(premise2_terms)
+
+ if len(common_terms) != 1:
+ return False
+ middle_term = next(iter(common_terms))
+
+ # Gather all terms => must be exactly 3 distinct terms
+ all_terms = premise1_terms.union(premise2_terms)
+ if len(all_terms) != 3:
return False
- # Rule 5: Two particular premises -> invalid
- if q1 in (Quantifier.SOME, Quantifier.SOME_NOT) and q2 in (Quantifier.SOME, Quantifier.SOME_NOT):
+ # The conclusion must use the other two terms (not the middle)
+ other_two = all_terms - {middle_term}
+ conclusion_terms = {c_subj, c_pred}
+ if conclusion_terms != other_two:
return False
- # Rule 5: Universal conclusion with particular premise -> invalid
- if qc in (Quantifier.ALL, Quantifier.NO) and (
- q1 in (Quantifier.SOME, Quantifier.SOME_NOT) or q2 in (Quantifier.SOME, Quantifier.SOME_NOT)
- ):
+ # --- 3) Identify which premise is major vs. minor ---
+ def premise_contains(premise, term):
+ return (premise[1] == term) or (premise[2] == term)
+
+ if premise_contains(premise1, c_pred):
+ major = premise1
+ minor = premise2
+ elif premise_contains(premise2, c_pred):
+ major = premise2
+ minor = premise1
+ else:
return False
- # Rule 1: Barbara syllogism
- if q1 == Quantifier.ALL and q2 == Quantifier.ALL:
- if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
- return qc == Quantifier.ALL
+ # The minor premise must contain the conclusion's subject
+ if not premise_contains(minor, c_subj):
+ return False
- # Rule 2: Celarent syllogism
- if q1 == Quantifier.NO and q2 == Quantifier.ALL:
- if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
- return qc == Quantifier.NO
+ # --- 4) Quick checks (traditional “no two negative,” etc.) ---
+ if (q1 in negative_set) and (q2 in negative_set):
+ return False
+ if (q1 in particular_set) and (q2 in particular_set):
+ return False
+ if q3 in universal_set:
+ if (q1 in particular_set) or (q2 in particular_set):
+ return False
+ if q3 in negative_set:
+ if not ((q1 in negative_set) or (q2 in negative_set)):
+ return False
- # Rule 2: Cesare syllogism
- if q1 == Quantifier.ALL and q2 == Quantifier.NO:
- if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
- return qc == Quantifier.NO
+ # --- 5) Distribution checks ---
+ def distribution(q: Quantifier):
+ if q == Quantifier.ALL: # A
+ return (True, False)
+ elif q == Quantifier.NO: # E
+ return (True, True)
+ elif q == Quantifier.SOME: # I
+ return (False, False)
+ elif q == Quantifier.SOME_NOT: # O
+ return (False, True)
+ else:
+ raise ValueError(f"Unknown quantifier: {q}")
- # Rule 3: Darii syllogism
- if q1 == Quantifier.SOME and q2 == Quantifier.ALL:
- if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
- return qc == Quantifier.SOME
+ # Conclusion distribution
+ dist_c_subj, dist_c_pred = distribution(q3)
- # Rule 3: Disamis syllogism
- if q1 == Quantifier.ALL and q2 == Quantifier.SOME:
- if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
- return qc == Quantifier.SOME
+ # Major premise distribution
+ q_major, major_subj, major_pred = major
+ dist_major_subj, dist_major_pred = distribution(q_major)
- # Rule 4: Ferio syllogism
- if q1 == Quantifier.SOME_NOT and q2 == Quantifier.ALL:
- if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
- return qc == Quantifier.SOME_NOT
+ # Minor premise distribution
+ q_minor, minor_subj, minor_pred = minor
+ dist_minor_subj, dist_minor_pred = distribution(q_minor)
- # Rule 4: Festino syllogism
- if q1 == Quantifier.ALL and q2 == Quantifier.SOME_NOT:
- if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
- return qc == Quantifier.SOME_NOT
+ # If the conclusion's subject is distributed, check it in the minor premise
+ if dist_c_subj:
+ if c_subj == minor_subj:
+ if not dist_minor_subj:
+ return False
+ elif c_subj == minor_pred:
+ if not dist_minor_pred:
+ return False
- return False
+ # If the conclusion's predicate is distributed, check it in the major premise
+ if dist_c_pred:
+ if c_pred == major_subj:
+ if not dist_major_subj:
+ return False
+ elif c_pred == major_pred:
+ if not dist_major_pred:
+ return False
+
+ # If either premise is negative, the conclusion must be negative.
+ if (q1 in negative_set) or (q2 in negative_set):
+ if q3 not in negative_set:
+ return False
+
+ # If all checks pass, it's valid
+ return True
def _format_quantifier_statement(self, quantifier: Quantifier, subject: Term, predicate: Term) -> str:
"""Format a quantified statement in natural language"""
@@ -219,18 +248,29 @@ class SyllogismDataset(ProceduralDataset):
terms = rng.sample(self.terms, 3)
quantifiers = self._get_allowed_quantifiers()
- # Generate premises and conclusion
- premise1 = (rng.choice(quantifiers), terms[0], terms[1])
- premise2 = (rng.choice(quantifiers), terms[1], terms[2])
- conclusion = (rng.choice(quantifiers), terms[0], terms[2])
+ target_valid = rng.random() > self.config.invalid_ratio # Invert ratio to match meaning
+ max_attempts = 100
+ attempts = 0
- # Decide if this should be a valid or invalid syllogism
- is_valid = True
- if self.config.include_invalid and rng.random() < self.config.invalid_ratio:
- is_valid = False
- # If should be invalid, regenerate conclusion until invalid
- while self._is_valid_syllogism(premise1, premise2, conclusion):
- conclusion = (rng.choice(quantifiers), terms[0], terms[2])
+ while attempts < max_attempts:
+ # Generate premises and conclusion
+ premise1 = (rng.choice(quantifiers), terms[0], terms[1])
+ premise2 = (rng.choice(quantifiers), terms[1], terms[2])
+ conclusion = (rng.choice(quantifiers), terms[0], terms[2])
+
+ # Check if validity matches target
+ is_valid = self._is_valid_syllogism(premise1, premise2, conclusion)
+ if is_valid == target_valid:
+ break
+
+ attempts += 1
+
+ if attempts >= max_attempts:
+ # If we couldn't find a matching syllogism, return a basic valid one
+ premise1 = (Quantifier.ALL, terms[0], terms[1])
+ premise2 = (Quantifier.ALL, terms[1], terms[2])
+ conclusion = (Quantifier.ALL, terms[0], terms[2])
+ is_valid = True
# Format the syllogism as text
premise1_text = self._format_quantifier_statement(premise1[0], premise1[1], premise1[2])
diff --git a/tests/test_complex_arithmetic.py b/tests/test_complex_arithmetic.py
new file mode 100644
index 00000000..0d369fc1
--- /dev/null
+++ b/tests/test_complex_arithmetic.py
@@ -0,0 +1,90 @@
+import pytest
+
+from reasoning_gym.algebra.complex_arithmetic import ComplexArithmeticConfig, ComplexArithmeticDataset
+
+
+def test_complex_arithmetic_basic():
+ """Test basic functionality of complex arithmetic dataset."""
+ config = ComplexArithmeticConfig(
+ min_real=-5, max_real=5, min_imag=-5, max_imag=5, operations=("+", "-", "*", "/"), seed=42, size=10
+ )
+ dataset = ComplexArithmeticDataset(config)
+
+ print(dataset)
+
+ # Test dataset size
+ assert len(dataset) == 10
+
+ # Test a specific item
+ item = dataset[0]
+ assert "question" in item
+ assert "answer" in item
+ assert "metadata" in item
+
+ # Add more detailed assertions
+ assert isinstance(item["question"], str)
+ assert isinstance(item["answer"], str)
+ assert isinstance(item["metadata"], dict)
+
+ # Check metadata structure
+ assert "num1" in item["metadata"]
+ assert "num2" in item["metadata"]
+ assert "operation" in item["metadata"]
+ assert "result" in item["metadata"]
+
+ # Check data types in metadata
+ assert isinstance(item["metadata"]["num1"], tuple)
+ assert isinstance(item["metadata"]["num2"], tuple)
+ assert len(item["metadata"]["num1"]) == 2 # Real and imaginary parts
+ assert len(item["metadata"]["num2"]) == 2
+ assert isinstance(item["metadata"]["operation"], str)
+ assert isinstance(item["metadata"]["result"], tuple)
+
+ # Make sure answer matches the result in metadata
+ # results is a tuple of two floats (real, imag) and answer is a string
+ # answer is formatted as "real + imagi"
+ assert ComplexArithmeticDataset.parse_string_to_complex(item["answer"]) == complex(*item["metadata"]["result"])
+
+
+def test_complex_arithmetic_scoring():
+ """Test scoring function with various answer formats and accuracies."""
+ config = ComplexArithmeticConfig(seed=42)
+ dataset = ComplexArithmeticDataset(config)
+
+ # Test case with answer 3 + 2i
+ metadata = {"result": (3.0, 2.0)}
+
+ # Test exact matches (should get score of 1.0)
+ assert dataset.score_answer("3 + 2i", metadata) == 1.0
+ assert dataset.score_answer("3+2i", metadata) == 1.0
+ assert dataset.score_answer("3.0 + 2.0i", metadata) == 1.0
+
+ # Test answers with small errors (should get high but < 1.0 scores)
+ print(dataset.score_answer("3.1 + 2i", metadata))
+ assert 0.9 < dataset.score_answer("3.1 + 2i", metadata) < 1.0
+ assert 0.9 < dataset.score_answer("3 + 2.1i", metadata) < 1.0
+ assert 0.7 < dataset.score_answer("3.1 + 2.1i", metadata) < 0.95
+
+ # Test answers with moderate errors (should get medium scores)
+ assert 0.3 < dataset.score_answer("4 + 2i", metadata) < 0.4
+ assert 0.3 < dataset.score_answer("3 + 3i", metadata) < 0.4
+
+ # Test answers with large errors (should get very low scores)
+ assert dataset.score_answer("10 + 10i", metadata) < 0.01
+
+ # Test invalid answers (should get 0.0)
+ assert dataset.score_answer("invalid", metadata) == 0.0
+ assert dataset.score_answer(None, metadata) == 0.0
+ assert dataset.score_answer("inf + 2i", metadata) == 0.0
+
+
+def test_complex_arithmetic_division_by_zero():
+ """Test that division by zero is handled properly."""
+ config = ComplexArithmeticConfig(operations=("/",), seed=42) # Only test division
+ dataset = ComplexArithmeticDataset(config)
+
+ # Check multiple items to ensure no division by zero
+ for i in range(10):
+ item = dataset[i]
+ num2 = complex(*item["metadata"]["num2"])
+ assert num2 != 0
diff --git a/tests/test_isomorphic_strings.py b/tests/test_isomorphic_strings.py
new file mode 100644
index 00000000..6e515cf7
--- /dev/null
+++ b/tests/test_isomorphic_strings.py
@@ -0,0 +1,108 @@
+"""Tests for Isomorphic Strings questions generation"""
+
+import json
+
+import pytest
+
+from reasoning_gym.algorithmic.isomorphic_strings import IsomorphicStringsConfig, IsomorphicStringsDataset
+
+
+def test_isomorphic_strings_config_validation():
+ """Test that invalid configs raise appropriate errors"""
+ with pytest.raises(AssertionError):
+ config = IsomorphicStringsConfig(max_string_length=-1) # Negative not allowed
+ config.validate()
+
+ with pytest.raises(AssertionError):
+ config = IsomorphicStringsConfig(max_string_length=0) # Zero not allowed
+ config.validate()
+
+ with pytest.raises(AssertionError):
+ config = IsomorphicStringsConfig(max_string_length=1) # One not allowed
+ config.validate()
+
+ with pytest.raises(AssertionError):
+ config = IsomorphicStringsConfig(p_solvable=-0.01) # < 0 not allowed
+ config.validate()
+
+ with pytest.raises(AssertionError):
+ config = IsomorphicStringsConfig(p_solvable=1.01) # > 1 not allowed
+ config.validate()
+
+
+def test_isomorphic_strings_dataset_deterministic():
+ """Test that dataset generates same items with same seed"""
+ config = IsomorphicStringsConfig(seed=42, size=10)
+ dataset1 = IsomorphicStringsDataset(config)
+ dataset2 = IsomorphicStringsDataset(config)
+
+ for i in range(len(dataset1)):
+ assert dataset1[i] == dataset2[i]
+
+
+def test_isomorphic_strings_dataset_items():
+ """Test basic properties of generated items"""
+ config = IsomorphicStringsConfig(max_string_length=10, size=10, seed=42)
+ dataset = IsomorphicStringsDataset(config)
+
+ for i in range(len(dataset)):
+ item = dataset[i]
+ # Check item structure
+ assert isinstance(item, dict)
+ assert "question" in item
+ assert "answer" in item
+ assert "metadata" in item
+
+ # Check metadata
+ assert "words" in item["metadata"]
+ assert "solution" in item["metadata"]
+ assert "solvable" in item["metadata"]
+
+ words = item["metadata"]["words"]
+ solution = item["metadata"]["solution"]
+ solvable = item["metadata"]["solvable"]
+
+ # Verify list dimensions
+ assert len(words) == 2
+ assert solution in {True, False}
+ assert solvable in {True, False}
+ assert solution == solvable
+
+
+def test_isomorphic_strings_dataset_iteration():
+ """Test that iteration respects dataset size"""
+ config = IsomorphicStringsConfig(size=5, seed=42)
+ dataset = IsomorphicStringsDataset(config)
+
+ items = list(dataset)
+ assert len(items) == config.size
+
+ # Test multiple iterations yield same items
+ assert items == list(dataset)
+
+
+def test_isomorphic_strings_answer():
+ """Test the _check_isomorphic method"""
+ config = IsomorphicStringsConfig(seed=42)
+ dataset = IsomorphicStringsDataset(config)
+
+ # General use case
+ s, t = "foo", "bar"
+ assert dataset._check_isomorphic(s, t) == False
+
+ s, t = "foo", "baa"
+ assert dataset._check_isomorphic(s, t) == True
+
+ # Unequal lengths
+ s, t = "foo", "bo"
+ assert dataset._check_isomorphic(s, t) == False
+
+ # Empty strings
+ (
+ s,
+ t,
+ ) = (
+ "",
+ "",
+ )
+ assert dataset._check_isomorphic(s, t) == True
diff --git a/tests/test_polynomial_multiplication.py b/tests/test_polynomial_multiplication.py
new file mode 100644
index 00000000..a27bd6bf
--- /dev/null
+++ b/tests/test_polynomial_multiplication.py
@@ -0,0 +1,166 @@
+import pytest
+import sympy as sp
+
+from reasoning_gym import create_dataset
+from reasoning_gym.algebra.polynomial_multiplication import (
+ PolynomialMultiplicationConfig,
+ PolynomialMultiplicationDataset,
+)
+
+
+def test_polynomial_config_validation():
+ """Test that invalid configs raise appropriate errors"""
+ with pytest.raises(AssertionError):
+ PolynomialMultiplicationConfig(min_terms=0).validate()
+
+ with pytest.raises(AssertionError):
+ PolynomialMultiplicationConfig(min_value=0).validate()
+
+ with pytest.raises(AssertionError):
+ PolynomialMultiplicationConfig(min_degree=0, max_degree=3).validate()
+
+ with pytest.raises(AssertionError):
+ PolynomialMultiplicationConfig(min_degree=4, max_degree=3).validate()
+
+ with pytest.raises(AssertionError):
+ PolynomialMultiplicationConfig(operators=("^",)).validate()
+
+ with pytest.raises(AssertionError):
+ PolynomialMultiplicationConfig(min_polynomials=5, max_polynomials=2).validate()
+
+
+def test_polynomial_multiplication_dataset_basic():
+ """Test dataset creation and length"""
+ dataset_size = 50
+ config = PolynomialMultiplicationConfig(
+ min_terms=2,
+ max_terms=3,
+ min_value=1,
+ max_value=5,
+ min_degree=1,
+ max_degree=2,
+ min_polynomials=2,
+ max_polynomials=3,
+ single_variable=True,
+ seed=42,
+ size=dataset_size,
+ )
+
+ dataset = PolynomialMultiplicationDataset(config)
+
+ assert len(dataset) == dataset_size
+
+
+def test_polynomial_equations_dataset_items():
+ """Test that generated items have correct structure"""
+ ds = create_dataset(
+ "polynomial_multiplication",
+ min_terms=2,
+ max_terms=3,
+ min_value=1,
+ max_value=5,
+ min_degree=1,
+ max_degree=2,
+ min_polynomials=2,
+ max_polynomials=5,
+ single_variable=False,
+ size=3,
+ seed=100,
+ )
+
+ for item in ds:
+ assert "question" in item
+ assert "answer" in item
+ assert "metadata" in item
+
+ # Check metadata
+ assert isinstance(item["metadata"]["polynomial_expr"], str)
+ assert isinstance(item["metadata"]["single_variable"], bool)
+
+ # Check polynomial_expr existence
+ poly_str = item["metadata"]["polynomial_expr"]
+ # Ensure it can parse with sympy
+ sp.sympify(poly_str)
+
+
+def test_polynomial_equations_dataset_deterministic():
+ """Test dataset reproducibility with fixed seed."""
+ cfg = PolynomialMultiplicationConfig(seed=999, size=3)
+ ds1 = PolynomialMultiplicationDataset(cfg)
+ ds2 = PolynomialMultiplicationDataset(cfg)
+
+ for i in range(len(ds1)):
+ assert ds1[i] == ds2[i], "Polynomial datasets with same seed should match exactly."
+
+
+def test_polynomial_solutions_evaluation():
+ """Test that solution satisfy the polynomial multiplication."""
+ ds = create_dataset(
+ "polynomial_multiplication",
+ min_terms=2,
+ max_terms=4,
+ min_value=1,
+ max_value=10,
+ min_degree=1,
+ max_degree=3,
+ min_polynomials=2,
+ max_polynomials=5,
+ single_variable=False,
+ size=5,
+ seed=42,
+ )
+
+ for item in ds:
+ # Extract the polynomial expression
+ poly_str = item["metadata"]["polynomial_expr"]
+ # Get the polynomial product
+ poly_expr = sp.expand(poly_str)
+
+ # Verify that each solution satisfies the polynomial
+ assert poly_expr == item["answer"]
+
+
+def test_score_function():
+ """Test that solution satisfy the polynomial multiplication."""
+ ds = create_dataset(
+ "polynomial_multiplication",
+ min_terms=2,
+ max_terms=4,
+ min_value=1,
+ max_value=10,
+ min_degree=1,
+ max_degree=3,
+ min_polynomials=2,
+ max_polynomials=5,
+ single_variable=True,
+ size=1,
+ seed=42,
+ )
+
+ assert ds.score_answer(None, ds[0]["metadata"]) == 0.00
+ assert ds.score_answer("6*x**4 + 9*x**3 - 6*x**2 - 39*x - 45", ds[0]["metadata"]) == 1
+ assert ds.score_answer("Not a polynomial", ds[0]["metadata"]) == 0.01
+ assert ds.score_answer("x**4", ds[0]["metadata"]) == 0.05
+
+
+def test_multivariate_score_function():
+ """Test that solution satisfy the polynomial multiplication."""
+ ds = create_dataset(
+ "polynomial_multiplication",
+ min_terms=2,
+ max_terms=4,
+ min_value=1,
+ max_value=10,
+ min_degree=1,
+ max_degree=3,
+ min_polynomials=2,
+ max_polynomials=5,
+ single_variable=False,
+ size=1,
+ seed=42,
+ )
+
+ assert ds.score_answer(None, ds[0]["metadata"]) == 0.00
+ assert ds.score_answer("-27*a**3*c - 27*a**3 + 144*a*c + 144*a", ds[0]["metadata"]) == 1
+ assert ds.score_answer("Not a polynomial", ds[0]["metadata"]) == 0.01
+ assert ds.score_answer("x**4", ds[0]["metadata"]) == 0.05
diff --git a/tests/test_self_reference.py b/tests/test_self_reference.py
new file mode 100644
index 00000000..66f15081
--- /dev/null
+++ b/tests/test_self_reference.py
@@ -0,0 +1,55 @@
+import pytest
+
+from reasoning_gym.logic.self_reference import SelfReferenceConfig, SelfReferenceDataset
+
+
+def test_self_reference():
+ """Test basic properties and solution of generated items"""
+
+ # Easy
+ config = SelfReferenceConfig(seed=42, size=20, difficulty=1)
+ dataset = SelfReferenceDataset(config)
+
+ for item in dataset:
+ assert isinstance(item, dict)
+ assert "question" in item
+ assert "answer" in item
+ assert "metadata" in item
+
+ # Test the scoring
+ assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+ assert dataset.score_answer(answer=99, entry=item) == 0.1
+ assert dataset.score_answer(answer="99", entry=item) == 0.1
+ assert dataset.score_answer(answer=None, entry=item) == 0.0
+
+ # # Medium
+ config = SelfReferenceConfig(seed=42, size=1, difficulty=5)
+ dataset = SelfReferenceDataset(config)
+
+ for item in dataset:
+ assert isinstance(item, dict)
+ assert "question" in item
+ assert "answer" in item
+ assert "metadata" in item
+
+ # Test the scoring
+ assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+ assert dataset.score_answer(answer=99, entry=item) == 0.1
+ assert dataset.score_answer(answer="99", entry=item) == 0.1
+ assert dataset.score_answer(answer=None, entry=item) == 0.0
+
+ # # Hard
+ config = SelfReferenceConfig(seed=42, size=1, difficulty=10)
+ dataset = SelfReferenceDataset(config)
+
+ for item in dataset:
+ assert isinstance(item, dict)
+ assert "question" in item
+ assert "answer" in item
+ assert "metadata" in item
+
+ # Test the scoring
+ assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+ assert dataset.score_answer(answer=99, entry=item) == 0.1
+ assert dataset.score_answer(answer="99", entry=item) == 0.1
+ assert dataset.score_answer(answer=None, entry=item) == 0.0
diff --git a/tests/test_sokoban.py b/tests/test_sokoban.py
new file mode 100644
index 00000000..c4d1e2b8
--- /dev/null
+++ b/tests/test_sokoban.py
@@ -0,0 +1,50 @@
+import pytest
+
+from reasoning_gym.games.sokoban import SokobanConfig, SokobanDataset
+
+
+def test_sokoban():
+ """Test basic properties and solution of generated items"""
+
+ # Easy
+ config = SokobanConfig(seed=42, size=20)
+ dataset = SokobanDataset(config)
+
+ for item in dataset:
+ assert isinstance(item, dict)
+ assert "question" in item
+ assert "answer" in item
+ assert "metadata" in item
+
+ # Test the scoring
+ assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+ assert dataset.score_answer(answer="RU", entry=item) == 0.1
+ assert dataset.score_answer(answer=None, entry=item) == 0.0
+
+ # Medium
+ config = SokobanConfig(seed=42, min_h=40, max_h=50, min_w=40, max_w=50, min_boxes=20, max_boxes=30, size=3)
+ dataset = SokobanDataset(config)
+
+ for item in dataset:
+ assert isinstance(item, dict)
+ assert "question" in item
+ assert "answer" in item
+ assert "metadata" in item
+
+ # Test the scoring
+ assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+ assert dataset.score_answer(answer=None, entry=item) == 0.0
+
+ # Hard
+ config = SokobanConfig(seed=42, min_h=400, max_h=500, min_w=400, max_w=500, min_boxes=50, max_boxes=50, size=1)
+ dataset = SokobanDataset(config)
+
+ for item in dataset:
+ assert isinstance(item, dict)
+ assert "question" in item
+ assert "answer" in item
+ assert "metadata" in item
+
+ # Test the scoring
+ assert dataset.score_answer(answer=item["answer"], entry=item) == 1.0
+ assert dataset.score_answer(answer=None, entry=item) == 0.0
diff --git a/tests/test_syllogisms.py b/tests/test_syllogisms.py
index 498be586..9f2c5607 100644
--- a/tests/test_syllogisms.py
+++ b/tests/test_syllogisms.py
@@ -64,6 +64,204 @@ def test_syllogism_dataset_items():
assert "Does it logically follow that:" in item["question"]
+def test_valid_syllogism_forms():
+ """Test specific valid syllogistic forms"""
+ config = SyllogismConfig(size=1, seed=42)
+ dataset = SyllogismDataset(config)
+
+ # Create some test terms
+ A = Term("mortal", "mortals")
+ B = Term("human", "humans")
+ C = Term("animal", "animals")
+
+ # Test Barbara (AAA-1)
+ # Major premise: All M are P
+ # Minor premise: All S are M
+ # Conclusion: All S are P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.ALL, B, C), # All B (M) are C (P)
+ (Quantifier.ALL, A, B), # All A (S) are B (M)
+ (Quantifier.ALL, A, C), # All A (S) are C (P)
+ )
+
+ # Test Celarent (EAE-1)
+ # Major premise: No M are P
+ # Minor premise: All S are M
+ # Conclusion: No S are P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.NO, B, C), # No B (M) are C (P)
+ (Quantifier.ALL, A, B), # All A (S) are B (M)
+ (Quantifier.NO, A, C), # No A (S) are C (P)
+ )
+
+ # Test Cesare (EAE-2) — corrected order
+ # Major premise: No P are M
+ # Minor premise: All S are M
+ # Conclusion: No S are P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.NO, C, B), # No C (P) are B (M) [Major premise]
+ (Quantifier.ALL, A, B), # All A (S) are B (M) [Minor premise]
+ (Quantifier.NO, A, C), # No A (S) are C (P)
+ )
+
+ # Test Darii (AII-1)
+ # Major premise: All M are P
+ # Minor premise: Some S are M
+ # Conclusion: Some S are P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.ALL, B, C), # All B (M) are C (P)
+ (Quantifier.SOME, A, B), # Some A (S) are B (M)
+ (Quantifier.SOME, A, C), # Some A (S) are C (P)
+ )
+
+ # Test Disamis (IAI-3)
+ # Major premise: Some M are P
+ # Minor premise: All M are S
+ # Conclusion: Some S are P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.SOME, B, C), # Some B (M) are C (P)
+ (Quantifier.ALL, B, A), # All B (M) are A (S)
+ (Quantifier.SOME, A, C), # Some A (S) are C (P)
+ )
+
+ # Test Ferio (EIO-1)
+ # Major premise: No M are P
+ # Minor premise: Some S are M
+ # Conclusion: Some S are not P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.NO, B, C), # No B (M) are C (P)
+ (Quantifier.SOME, A, B), # Some A (S) are B (M)
+ (Quantifier.SOME_NOT, A, C), # Some A (S) are not C (P)
+ )
+
+ # Test Festino (EIO-2)
+ # Major premise: No P are M
+ # Minor premise: Some S are M
+ # Conclusion: Some S are not P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.NO, C, B), # No C (P) are B (M)
+ (Quantifier.SOME, A, B), # Some A (S) are B (M)
+ (Quantifier.SOME_NOT, A, C), # Some A (S) are not C (P)
+ )
+
+ # Test Datisi (AII-3)
+ # Major premise: All M are P
+ # Minor premise: Some M are S
+ # Conclusion: Some S are P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.ALL, B, C), # All B (M) are C (P)
+ (Quantifier.SOME, B, A), # Some B (M) are A (S)
+ (Quantifier.SOME, A, C), # Some A (S) are C (P)
+ )
+
+ # Test Bocardo (OAO-3)
+ # Major premise: Some M are not P
+ # Minor premise: All M are S
+ # Conclusion: Some S are not P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.SOME_NOT, B, C), # Some B (M) are not C (P)
+ (Quantifier.ALL, B, A), # All B (M) are A (S)
+ (Quantifier.SOME_NOT, A, C), # Some A (S) are not C (P)
+ )
+
+ # Test Baroco (AOO-2)
+ # Major premise: All P are M
+ # Minor premise: Some S are not M
+ # Conclusion: Some S are not P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.ALL, C, B), # All C (P) are B (M)
+ (Quantifier.SOME_NOT, A, B), # Some A (S) are not B (M)
+ (Quantifier.SOME_NOT, A, C), # Some A (S) are not C (P)
+ )
+
+ # Test Camestres (AEE-2)
+ # Major premise: All P are M
+ # Minor premise: No S are M
+ # Conclusion: No S are P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.ALL, C, B), # All C (P) are B (M)
+ (Quantifier.NO, A, B), # No A (S) are B (M)
+ (Quantifier.NO, A, C), # No A (S) are C (P)
+ )
+
+ # Test Dimaris (IAI-4)
+ # Major premise: Some P are M
+ # Minor premise: All M are S
+ # Conclusion: Some S are P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.SOME, C, B), # Some C (P) are B (M)
+ (Quantifier.ALL, B, A), # All B (M) are A (S)
+ (Quantifier.SOME, A, C), # Some A (S) are C (P)
+ )
+
+ # Test Ferison (EIO-3)
+ # Major premise: No M are P
+ # Minor premise: Some M are S
+ # Conclusion: Some S are not P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.NO, B, C), # No B (M) are C (P)
+ (Quantifier.SOME, B, A), # Some B (M) are A (S)
+ (Quantifier.SOME_NOT, A, C), # Some A (S) are not C (P)
+ )
+
+ # Test Fresison (EIO-4)
+ # Major premise: No P are M
+ # Minor premise: Some M are S
+ # Conclusion: Some S are not P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.NO, C, B), # No C (P) are B (M)
+ (Quantifier.SOME, B, A), # Some B (M) are A (S)
+ (Quantifier.SOME_NOT, A, C), # Some A (S) are not C (P)
+ )
+
+ # Test Camenes (AEE-4)
+ # Major premise: All P are M
+ # Minor premise: No M are S
+ # Conclusion: No S are P
+ assert dataset._is_valid_syllogism(
+ (Quantifier.ALL, C, B), # All C (P) are B (M)
+ (Quantifier.NO, B, A), # No B (M) are A (S)
+ (Quantifier.NO, A, C), # No A (S) are C (P)
+ )
+
+ # Test invalid forms
+ assert not dataset._is_valid_syllogism(
+ (Quantifier.SOME, B, C), # Some B are C
+ (Quantifier.SOME, A, B), # Some A are B
+ (Quantifier.SOME, A, C), # Some A are C (invalid: two particular premises)
+ )
+
+ assert not dataset._is_valid_syllogism(
+ (Quantifier.NO, B, C), # No B are C
+ (Quantifier.NO, A, B), # No A are B
+ (Quantifier.NO, A, C), # No A are C (invalid: two negative premises)
+ )
+
+ # Test specific invalid case with two negative premises
+ S = Term("student", "students")
+ M = Term("human", "humans")
+ P = Term("chef", "chefs")
+ assert not dataset._is_valid_syllogism(
+ (Quantifier.NO, S, M), # No students are humans
+ (Quantifier.NO, M, P), # No humans are chefs
+ (Quantifier.NO, S, P), # No students are chefs (invalid!)
+ )
+
+ child = Term("child", "children")
+ animal = Term("animal", "animals")
+ doctor = Term("doctor", "doctors")
+
+ # Premise 1: Some children are not animals
+ # Premise 2: All animals are doctors
+ # Conclusion: Some children are not doctors
+ # We expect this NOT to be a valid syllogism
+ assert not dataset._is_valid_syllogism(
+ (Quantifier.SOME_NOT, child, animal), # Some children are not animals
+ (Quantifier.ALL, animal, doctor), # All animals are doctors
+ (Quantifier.SOME_NOT, child, doctor), # Some children are not doctors
+ )
+
+
def test_syllogism_dataset_iteration():
"""Test that iteration respects dataset size"""
config = SyllogismConfig(size=5, seed=42)
@@ -74,41 +272,3 @@ def test_syllogism_dataset_iteration():
# Test multiple iterations yield same items
assert items == list(dataset)
-
-
-def test_syllogism_custom_terms():
- """Test syllogism generation with custom terms"""
- custom_terms = [
- Term("programmer", "programmers"),
- Term("coder", "coders"),
- Term("developer", "developers"),
- ]
- config = SyllogismConfig(terms=custom_terms, size=10, seed=42)
- dataset = SyllogismDataset(config)
-
- for item in dataset:
- # Verify only custom terms are used
- text = item["question"] + str(item["metadata"])
- assert any(term.name in text or term.plural in text for term in custom_terms)
- # Verify default terms are not used
- assert "mortal" not in text
- assert "human" not in text
-
-
-def test_syllogism_validity():
- """Test logical validity rules"""
- config = SyllogismConfig(
- allow_all=True,
- allow_no=False,
- allow_some=False,
- allow_some_not=False,
- include_invalid=False, # Only generate valid syllogisms
- size=10,
- seed=42,
- )
- dataset = SyllogismDataset(config)
-
- for item in dataset:
- # All valid ALL syllogisms should have "Yes" as answer
- assert item["answer"] == "Yes"
- assert item["metadata"]["is_valid"] is True
diff --git a/tests/test_tsumego.py b/tests/test_tsumego.py
new file mode 100644
index 00000000..e979bcac
--- /dev/null
+++ b/tests/test_tsumego.py
@@ -0,0 +1,281 @@
+"""Tests for Ttsumego problem generation"""
+
+import re
+
+import pytest
+
+from reasoning_gym.games.tsumego import TsumegoConfig, TsumegoDataset
+
+
+def test_config_validation():
+ # Valid configuration
+ TsumegoConfig(min_board_size=9, max_board_size=13, max_stones=10, size=100, seed=42)
+
+ # Invalid configurations
+ with pytest.raises(ValueError):
+ TsumegoConfig(min_board_size=4, max_board_size=13, max_stones=10) # min_board_size too low
+ with pytest.raises(ValueError):
+ TsumegoConfig(min_board_size=9, max_board_size=20, max_stones=10) # max_board_size too high
+ with pytest.raises(ValueError):
+ TsumegoConfig(min_board_size=13, max_board_size=9, max_stones=10) # min_board_size > max_board_size
+ with pytest.raises(ValueError):
+ TsumegoConfig(min_board_size=9, max_board_size=13, max_stones=2) # max_stones too low
+
+
+def test_dataset_item_properties():
+ config = TsumegoConfig(min_board_size=9, max_board_size=9, max_stones=15, size=100, seed=42)
+ dataset = TsumegoDataset(config)
+ item = dataset[0]
+ # Check that item has the required keys
+ for key in ["question", "answer", "metadata"]:
+ assert key in item
+
+ metadata = item["metadata"]
+ for key in ["difficulty", "board", "solution"]:
+ assert key in metadata
+
+ board = metadata["board"]
+ # Board size should be equal to the fixed min_board_size for this test
+ assert len(board) == config.min_board_size
+ assert all(len(row) == config.min_board_size for row in board)
+ # Check stone count does not exceed max_stones + 7 (to account for extra fill in capture formation)
+ stone_count = sum(cell in "XO" for row in board for cell in row)
+ assert stone_count <= config.max_stones + 7
+
+
+def test_deterministic_generation():
+ config = TsumegoConfig(min_board_size=9, max_board_size=9, max_stones=10, seed=42)
+ dataset1 = TsumegoDataset(config)
+ dataset2 = TsumegoDataset(config)
+ for i in range(3):
+ item1 = dataset1[i]
+ item2 = dataset2[i]
+ assert item1["metadata"]["board"] == item2["metadata"]["board"]
+ assert item1["answer"] == item2["answer"]
+
+
+def test_liberties_and_move():
+ # Use a small board for simplicity
+ config = TsumegoConfig(min_board_size=5, max_board_size=5, max_stones=10, size=10)
+ dataset = TsumegoDataset(config)
+
+ # Part 1: Liberty counting test
+ board_liberties = [
+ [".", "O", ".", ".", "."],
+ ["O", "X", "O", ".", "."],
+ [".", "O", ".", ".", "."],
+ [".", ".", ".", ".", "."],
+ [".", ".", ".", ".", "."],
+ ]
+ liberties = dataset._get_liberties(board_liberties, 1, 1)
+ assert len(liberties) == 0
+ liberties_edge = dataset._get_liberties(board_liberties, 0, 1)
+ assert len(liberties_edge) == 2
+
+ # Part 2: Test capturing move
+ # Construct a board where an enemy stone at (2,2) is surrounded on three sides,
+ # so that placing an "X" at (2,3) will remove its last liberty and capture it.
+ board_capture = [["." for _ in range(5)] for _ in range(5)]
+ board_capture[1][2] = "X"
+ board_capture[2][1] = "X"
+ board_capture[3][2] = "X"
+ board_capture[2][2] = "O"
+ # Now, (2,2) (enemy) has only one liberty at (2,3).
+ # Placing "X" at (2,3) should capture the enemy stone.
+ assert dataset._is_valid_move(board_capture, 2, 3, "X")
+ dataset._make_move(board_capture, 2, 3, "X")
+ # After move, captured_stones should be [(2,2)] and ko point set to (2,2).
+ assert not dataset._is_valid_move(board_capture, 2, 2, "O"), "Ko move should be invalid"
+
+ # Part 3: Test suicide move (without capture)
+ board_move = [
+ [".", "O", ".", ".", "."],
+ ["O", ".", "O", ".", "."],
+ [".", "O", ".", ".", "."],
+ [".", ".", ".", ".", "."],
+ [".", ".", ".", ".", "."],
+ ]
+ # Placing "X" at (1,1) would be suicide as all adjacent positions are occupied by "O".
+ assert not dataset._is_valid_move(board_move, 1, 1, "X")
+
+
+def convert_solution(sol, board_size):
+ # sol is expected to be a string like 'E5'
+ letter = sol[0].upper()
+ number = int(sol[1:])
+ return (board_size - number, ord(letter) - ord("A"))
+
+
+def test_score_answer():
+ config = TsumegoConfig(min_board_size=9, max_board_size=9, max_stones=10, size=5)
+ dataset = TsumegoDataset(config)
+
+ # prepare dummy with letter+number format solution
+ entry = dataset[0].copy()
+ entry["metadata"]["solution"] = "E5"
+
+ # Patch score_answer to convert metadata solution if needed
+ original_score_answer = dataset.score_answer
+
+ def patched_score_answer(answer, entry):
+ board_size = len(entry["metadata"]["board"])
+ sol = entry["metadata"]["solution"]
+ if isinstance(sol, str):
+ entry["metadata"]["solution"] = convert_solution(sol, board_size)
+ return original_score_answer(answer, entry)
+
+ dataset.score_answer = patched_score_answer
+
+ # Correct letter-number answer (E corresponds to board coordinate (4,4) for a 9x9 board)
+ assert dataset.score_answer("E5", entry) == 1.0
+
+ # Valid but incorrect letter-number move (D corresponds to (4,3) for a 9x9 board)
+ assert dataset.score_answer("D4", entry) == 0.05
+
+ # Invalid format
+ assert dataset.score_answer("invalid", entry) == 0.01
+
+ # Empty answer
+ assert dataset.score_answer("", entry) == 0.01
+
+ # None answer
+ assert dataset.score_answer(None, entry) == 0.0
+
+ # Out-of-bound letter-number move: 'J' corresponds to 10 which is greater than board size = 9
+ assert dataset.score_answer("J9", entry) == 0.01
+
+ # test optimal score for answers, patching each entry
+ for x in dataset:
+ board_size = len(x["metadata"]["board"])
+ sol = x["metadata"]["solution"]
+ if isinstance(sol, str):
+ x["metadata"]["solution"] = convert_solution(sol, board_size)
+ assert len(x["metadata"]["board"]) == x["metadata"]["difficulty"]["board_size"]
+ assert dataset.score_answer(x["answer"], entry=x) == 1.0
+
+
+# Additional tests for game logic edge cases
+
+
+def test_get_group():
+ config = TsumegoConfig(min_board_size=5, max_board_size=5, max_stones=10, size=1, seed=42)
+ dataset = TsumegoDataset(config)
+ board = [
+ ["X", "X", "."],
+ [".", "X", "O"],
+ [".", ".", "O"],
+ ]
+ group_X = dataset._get_group(board, 0, 0)
+ expected_group_X = {(0, 0), (0, 1), (1, 1)}
+ assert group_X == expected_group_X
+
+ group_O = dataset._get_group(board, 1, 2)
+ expected_group_O = {(1, 2), (2, 2)}
+ assert group_O == expected_group_O
+
+
+def test_count_liberties():
+ config = TsumegoConfig(min_board_size=5, max_board_size=5, max_stones=10, size=1, seed=42)
+ dataset = TsumegoDataset(config)
+ board = [
+ ["X", "X", "."],
+ [".", "X", "O"],
+ [".", ".", "O"],
+ ]
+ group_X = {(0, 0), (0, 1), (1, 1)}
+ liberties_X = dataset._count_liberties(board, group_X)
+ # For (0,0): neighbor (1,0); (0,1): neighbor (0,2); (1,1): neighbors (1,0) and (2,1)
+ # Combined unique liberties: {(1,0), (0,2), (2,1)} so count should be 3
+ assert liberties_X == 3
+
+
+def test_out_of_bounds_move():
+ config = TsumegoConfig(min_board_size=5, max_board_size=5, max_stones=10, size=1, seed=42)
+ dataset = TsumegoDataset(config)
+ board = [["." for _ in range(5)] for _ in range(5)]
+ # Test moves that are out of bounds
+ assert not dataset._is_valid_move(board, -1, 0, "X")
+ assert not dataset._is_valid_move(board, 0, -1, "X")
+ assert not dataset._is_valid_move(board, 5, 0, "X")
+ assert not dataset._is_valid_move(board, 0, 5, "X")
+
+
+def test_move_on_occupied_intersection():
+ config = TsumegoConfig(min_board_size=5, max_board_size=5, max_stones=10, size=1, seed=42)
+ dataset = TsumegoDataset(config)
+ board = [["." for _ in range(5)] for _ in range(5)]
+ board[1][1] = "X"
+ # Attempting to play on an occupied spot should be invalid
+ assert not dataset._is_valid_move(board, 1, 1, "O")
+ assert not dataset._is_valid_move(board, 1, 1, "X")
+
+
+def test_valid_non_capturing_move():
+ config = TsumegoConfig(min_board_size=5, max_board_size=5, max_stones=10, size=1, seed=42)
+ dataset = TsumegoDataset(config)
+ board = [["." for _ in range(5)] for _ in range(5)]
+ # A move on an empty board that doesn't result in capture or suicide should be valid
+ assert dataset._is_valid_move(board, 0, 0, "X")
+ move_result = dataset._make_move(board, 0, 0, "X")
+ assert move_result
+ assert board[0][0] == "X"
+
+
+def test_multiple_capture():
+ # Set up a board where a move will capture multiple opponent stones,
+ # which should not trigger the ko rule (ko point remains None)
+ config = TsumegoConfig(min_board_size=5, max_board_size=5, max_stones=10, size=1, seed=42)
+ dataset = TsumegoDataset(config)
+ board = [
+ [".", ".", ".", ".", "."],
+ [".", "X", "X", "X", "."],
+ ["X", "O", "O", ".", "."],
+ [".", "X", "X", "X", "."],
+ [".", ".", ".", ".", "."],
+ ]
+ # Move at (2,3) with 'X' should capture the opponent stones at (2,1) and (2,2)
+ assert dataset._is_valid_move(board, 2, 3, "X")
+ move_result = dataset._make_move(board, 2, 3, "X")
+ assert move_result, "Move should be successfully made"
+ assert board[2][1] == ".", "Stone at (2,1) should be captured"
+ assert board[2][2] == ".", "Stone at (2,2) should be captured"
+ assert dataset._ko_point is None, "Ko point should not be set for multiple captures"
+
+
+def test_would_capture():
+ config = TsumegoConfig(min_board_size=5, max_board_size=5, max_stones=10, size=1, seed=42)
+ dataset = TsumegoDataset(config)
+ # Create a scenario similar to the one in test_liberties_and_move for capturing
+ board_capture = [["." for _ in range(5)] for _ in range(5)]
+ board_capture[1][2] = "X"
+ board_capture[2][1] = "X"
+ board_capture[3][2] = "X"
+ board_capture[2][2] = "O"
+ # Placing 'X' at (2,3) should capture the stone at (2,2)
+ assert dataset._would_capture(board_capture, 2, 3, "X")
+ # In a scenario with no capture, the move should not be considered capturing
+ board_no_capture = [["." for _ in range(5)] for _ in range(5)]
+ board_no_capture[2][2] = "O"
+ assert not dataset._would_capture(board_no_capture, 0, 0, "X")
+
+
+def test_capture_verification():
+ """Verifies that the solution move in a generated puzzle captures at least one opponent stone."""
+ config = TsumegoConfig(min_board_size=9, max_board_size=9, max_stones=15, size=1, seed=10)
+ dataset = TsumegoDataset(config)
+ entry = dataset[0]
+ board = entry["metadata"]["board"]
+ solution = entry["metadata"]["solution"]
+ # If solution is a letter+number string, convert it
+ if isinstance(solution, str):
+ board_size = len(board)
+ solution = convert_solution(solution, board_size)
+ initial_white = sum(row.count("O") for row in board)
+
+ # Make a deep copy of the board to simulate the move
+ board_after = [row[:] for row in board]
+ move_success = dataset._make_move(board_after, solution[0], solution[1], "X")
+ assert move_success, "The solution move should be legal."
+
+ final_white = sum(row.count("O") for row in board_after)
+ assert final_white < initial_white, "The solution move should capture at least one opponent stone."