diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f2bf0ecd..22878294 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -29,7 +29,7 @@ Thank you for your interest in contributing to Reasoning Gym! This document prov When creating new datasets, please follow these guidelines: -1. **Focus on Complex Problems**: +1. **Focus on Complex Problems**: - Prioritize problems where guessing has a low probability of success (e.g., number multiplication) - Avoid tasks with small answer sets (true/false, multiple-choice) as they create noisy rewards for RL @@ -38,13 +38,12 @@ When creating new datasets, please follow these guidelines: - Derive your dataset class from `ProceduralDataset` (see [dataset.py](https://github.com/open-thought/reasoning-gym/blob/main/reasoning_gym/dataset.py)) - Include comprehensive unit tests - Return dictionary items with keys: `"question"`, `"answer"`, and `"metadata"` - - Use `None` for `"answer"` when multiple valid answers exist - - For complex datasets, implement the `score_answer()` method (return value range: [0, 1]) + - For datasets with multiple correct answers, override the `score_answer()` method (return value range: [0, 1]) 3. **Getting Started**: - - Review example implementations: - - [chain_sum.py](reasoning_gym/arithmetic/chain_sum.py) - - [test_chain_sum.py](https://github.com/open-thought/reasoning-gym/blob/main/tests/test_chain_sum.py) + - Review an example implementation: + - Configuration & dataset class: [chain_sum.py](reasoning_gym/arithmetic/chain_sum.py) + - Unit tests: [test_chain_sum.py](https://github.com/open-thought/reasoning-gym/blob/main/tests/test_chain_sum.py) - Write clear question prompts that an average human can understand and answer correctly ## Pull Request Process @@ -76,5 +75,3 @@ When creating new datasets, please follow these guidelines: ## Need Help? Join our community discussion in the `#reasoning-gym` channel on the [GPU-Mode Discord server](https://discord.gg/gpumode). - - diff --git a/GALLERY.md b/GALLERY.md index a712c1d6..9defbbaf 100644 --- a/GALLERY.md +++ b/GALLERY.md @@ -2502,7 +2502,7 @@ Metadata: {'task_type': 'datetime_tz', 'start_time': datetime.datetime(2964, 6, Example 2: Question: A video call started at 09:44 and ended at 12:22. How long was the call? Answer in HH:MM. Answer: 02:38 -Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 7, 9, 44), 'end_time': datetime.datetime(2025, 2, 7, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'} +Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 8, 9, 44), 'end_time': datetime.datetime(2025, 2, 8, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'} Example 3: Question: Calculate the time difference between Sat Dec 22 2677 and Thu Mar 21 2678. Express the result in D days. @@ -2576,14 +2576,14 @@ Metadata: {'num_disks': 6, 'num_pegs': 3, 'start_peg': 1, 'target_peg': 2, 'auxi ```` ### tsumego -Generates (one-move) Tsumego problems with configurable parameters +Generates Tsumego problems with configurable parameters Default configuration: ```python min_board_size = 9 max_board_size = 13 max_stones = 15 -size = 10 +size = 100 seed = 42 ``` @@ -2608,11 +2608,8 @@ O - White Specify your move in coordinates (e.g. 'C4' for column C, row 4) Answer: E4 - Metadata: {'difficulty': {'board_size': 9}, 'board': [['X', '.', '.', '.', 'X', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', 'O', '.', 'O', '.', '.', 'X', '.', '.'], ['.', '.', '.', 'X', '.', '.', '.', '.', 'O'], ['O', '.', 'X', 'O', 'X', '.', '.', '.', '.'], ['.', 'X', 'O', 'O', '.', 'O', '.', '.', '.'], ['.', '.', 'X', 'O', 'X', '.', '.', '.', '.'], ['.', '.', '.', 'X', '.', '.', '.', '.', '.'], ['.', 'O', '.', 'O', '.', '.', 'X', '.', '.']], 'solution': 'E4'} --------------------------------------------------- - Example 2: Question: Here's a Go challenge. Playing as Black, how can you capture as many white stones as possible? @@ -2632,11 +2629,8 @@ O - White Specify your move in coordinates (e.g. 'C4' for column C, row 4) Answer: B7 - Metadata: {'difficulty': {'board_size': 9}, 'board': [['.', '.', 'O', '.', '.', '.', '.', '.', '.'], ['.', 'X', 'O', '.', '.', '.', '.', '.', '.'], ['X', '.', 'X', '.', '.', '.', '.', '.', '.'], ['O', 'O', 'O', 'X', '.', '.', '.', '.', '.'], ['X', 'O', 'O', '.', '.', '.', '.', '.', '.'], ['.', 'X', '.', '.', '.', '.', '.', '.', 'O'], ['.', 'X', '.', '.', '.', '.', 'X', '.', '.'], ['O', '.', 'O', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', 'O', '.', '.', '.', '.']], 'solution': 'B7'} --------------------------------------------------- - Example 3: Question: Tsumego time. Black to play and capture some stones. Find the key move. @@ -2660,7 +2654,6 @@ O - White Specify your move in coordinates (e.g. 'C4' for column C, row 4) Answer: D4 - Metadata: {'difficulty': {'board_size': 12}, 'board': [['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', 'X', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['X', '.', '.', '.', '.', 'X', '.', '.', '.', 'X', '.', '.'], ['.', 'X', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', 'O', 'X', 'X', '.', '.', '.', '.', '.', '.', '.', 'O'], ['.', 'X', 'O', 'O', 'X', '.', '.', '.', '.', '.', '.', '.'], ['.', 'O', 'O', '.', '.', '.', '.', '.', 'O', '.', '.', 'O'], ['X', '.', 'X', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.'], ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', 'X', '.']], 'solution': 'D4'} ```` diff --git a/README.md b/README.md index f06de185..036bfe78 100644 --- a/README.md +++ b/README.md @@ -1,44 +1,28 @@ # Reasoning Gym -We are building a python library of procedural dataset generators and algorithmically verifiable reasoning environments for training Reasoning Models with reinforcement learning (RL). +We are building a python library of procedural dataset generators and algorithmically verifiable reasoning environments for training reasoning models with reinforcement learning (RL). The goal is to generate virtually infinite data with adjustable complexity. Algorithmic verification allows to train on tasks like Rubikā€˜s cube or [Countdown]() which have many correct solutions. -## Set up for development +## Dataset Gallery -1. Clone the project +In [GALLERY.md](https://github.com/open-thought/reasoning-gym/blob/main/GALLERY.md) you find example outputs of all datasets available in reasoning-gym. -``` -git clone https://github.com/open-thought/reasoning-gym.git -``` +## Installation -2. Create a virtual environment (here we use conda) +The `reasoning-gym` package requires Python >= 3.11. -``` -conda create --name reasoning_gym python=3.11 -y -conda activate reasoning_gym -``` - -3. Link project and install dependencies - -``` -pip install -e . -``` - -4. Install development dependencies - -``` -pip install -r requirements-dev.txt -``` - -> NOTE: To consume the APIs in reasoning_gym, just install from pip using the following +Install via pip: ``` pip install reasoning-gym ``` +For development setup see [CONTRIBUTING.md](CONTRIBUTING.md#delevloper-setup). + + ## How to instantiate a task dataset? Example: @@ -64,88 +48,10 @@ metadata: {'animals': {'sheep': 2, 'dog': 2}, 'total_legs': 16} ... ``` -See the [Dataset Gallery](https://github.com/open-thought/reasoning-gym/blob/main/GALLERY.md) for a complete list of available datasets with examples. +## Contributing -## Task Overview +Please see [CONTRIBUTING.md](CONTRIBUTING.md). -### Algebra Tasks +If you have ideas for dataset generators please create an issue here or contact us in the `#reasoning-gym` channel of the [GPU-Mode discord server](https://discord.gg/gpumode). -- `SimpleEquationsDataset`: Generate linear equations with one variable to solve (e.g. "3\*x + 2 = 14") -- `PolynomialEquationsDataset`: Generate polynomial equations with one variable to solve (e.g. "-6*h\*\*4 + 4*h\**2 - 5*h = 0") -- `PolynomialMultiplicationDataset`: Generate polynomial multiplicatons (e.g. "(8x^3 + x + 2)\*(y - 3)") - -### Arithmetic Tasks - -- `BasicArithmeticDataset`: Generate arithmetic expressions with configurable complexity and operators (+, -, \*, /) -- `CalendarArithmeticDatset`: Generate arithmetic problems around calendar navigation logic -- `ChainSum`: Generate addition/subtraction chains with configurable length and digit counts -- `FractionSimplificationDataset`: Generate fraction simplification tasks with configurable complexity -- `GCDDataset`: Generate Greatest Common Divisor problems with configurable number of integers -- `LCMDataset`: Generate Least Common Multiple problems with configurable number of integers -- `LegCountingDataset`: Generate animal leg counting word problems with various animals -- `PrimeFactorizationDataset`: Generate prime factorization tasks with configurable number ranges -- `TimeIntervalsDataset`: Generate time interval calculation tasks with various formats (time, date, datetime) and complexities - -### Algorithmic Tasks - -- `BaseConversionDataset`: Convert numbers between different bases (binary, hex, etc.) -- `CaesarCipherDataset`: Encrypt/decrypt text using Caesar cipher with configurable rotation -- `LetterCountingDataset`: Count letter occurrences in text spans -- `NumberFilteringDataset`: Filter numbers based on comparison with threshold -- `NumberSortingDataset`: Sort lists of numbers in ascending or descending order -- `WordSortingDataset`: Sort words in ascending or descending order using ASCII/Unicode ordering -- `LetterJumbleDataset`: Unscramble words that have had their letters randomly jumbled -- `SentenceReorderingDataset`: Reorder sentence after words in it have been randomly shuffled -- `SpellBackwardDataset`: Spell individual words backward (e.g. "sun" -> "nus") -- `WordSequenceReversalDataset`: Reverse word order in text spans -- `WordLadderDataset`: Generate word ladder puzzles where one word is transformed into another by changing one letter at a time -- `GroupAnagramsDataset`: Group anagrams together in a list of words -- `IsomorphicStrings`: Check if two strings are isomorphic (have the same character mapping) - -### Code Tasks - -- `BFDataset`: Generates BF programs of various difficult, from simple string printing to loops and conditional logic - -### Cognition Tasks - -- `NumberSequenceDataset`: Generate number sequences with discoverable patterns -- `ColorCubeRotationDataset`: Generate 3D spatial reasoning tasks with colored cube rotations and orientation tracking -- `RubiksCubeDataset`: Generate Rubik's Cube configurations and check correct solutions -- `FigletFontDataset`: Generate random words in different "Figlet" fonts for reasoning about the structure of letters - -### Logic Tasks - -- `PropositionalLogicDataset`: Generate propositional logic reasoning problems -- `SyllogismDataset`: Generates a [syllogism](https://en.wikipedia.org/wiki/Syllogism) reasoning dataset -- `AliceInWonderlandDataset`: Generates [AIW](https://openreview.net/forum?id=Mkl7dzjYiW) (Alice In Wonderland) problems with a few variations -- `ZebraDataset`: Generates [Zebra Puzzles](https://en.wikipedia.org/wiki/Zebra_Puzzle) of varying difficulty. -- `SelfReferenceDataset`: Generates self-referencing logic puzzles. - -### Graph Tasks - -- `FamilyRelationshipsDataset`: Generate family relationship reasoning tasks with family trees -- `QuantumLockDataset`: Generates puzzles which involve stateful arithmetic and a correct sequence of operations -- `LargestIslandDataset`: Generate a grid with islands and find the largest one -- `CourseScheduleDataset`: Generate a course schedule with prerequisites and find whether you can complete all courses - -### Game Tasks - -- `SudokuDataset`: Generate 9x9 Sudoku puzzles with configurable number of empty cells -- `SokobanDataset`: Generate [Sokoban](https://en.wikipedia.org/wiki/Sokoban) puzzles with configurable size and detail. -- `MiniSudokuDataset`: Generate 4x4 Mini Sudoku puzzles with configurable difficulty -- `MazeDataset`: Generate a maze with a start and a goal -- `CountdownDataset`: Generate number game tasks where numbers and operators must be combined to reach a target value -- `NQueensDataset`: Generate N-Queens puzzles with configurable board size and number of starting queens -- `TsumegoDataset`: Generate Tsumego capture puzzles with variable board sizes and stone placements - -## Future Generator Ideas - -- More complex math tasks (algebra, geometry) -- Algorithmic tasks (counting, sorting, re-ordering) -- Logic riddles -- Logic inductive programming tasks -- ARC-AGI synthetic riddles - -## Call for Contributions - -If you have ideas for additional procedural dataset generators please create an issue here or contact us in the `#reasoning-gym` channel of the [GPU-Mode discord server](https://discord.gg/gpumode). +[![](https://dcbadge.limes.pink/api/server/gpumode?style=flat)](https://discord.gg/gpumode) diff --git a/reasoning_gym/games/sokoban.py b/reasoning_gym/games/sokoban.py index f96d87ea..124aaf48 100644 --- a/reasoning_gym/games/sokoban.py +++ b/reasoning_gym/games/sokoban.py @@ -11,14 +11,14 @@ from ..factory import ProceduralDataset, register_dataset class SokobanConfig: """Configuration for sokoban puzzle generation""" - seed: Optional[int] = None - size: int = 500 min_w: int = 6 # Minimum width of the puzzle. min_h: int = 6 # Minimum height of the puzzle. max_w: int = 10 # Maximum width of the puzzle. max_h: int = 10 # Maximum height of the puzzle. min_boxes: int = 6 # Minimum number of boxes. max_boxes: int = 10 # Maximum number of boxes. + seed: Optional[int] = None + size: int = 500 def validate(self): """Validate configuration parameters""" diff --git a/reasoning_gym/games/tower_of_hanoi.py b/reasoning_gym/games/tower_of_hanoi.py index a9e7f458..7e33c236 100644 --- a/reasoning_gym/games/tower_of_hanoi.py +++ b/reasoning_gym/games/tower_of_hanoi.py @@ -27,7 +27,7 @@ class HanoiConfig: max_disks: int = 7 min_pegs: int = 3 max_pegs: int = 4 - size: int = 50 + size: int = 500 seed: Optional[int] = None visualize: bool = False # New parameter diff --git a/reasoning_gym/games/tsumego.py b/reasoning_gym/games/tsumego.py index be1e4fd6..f979a1e4 100644 --- a/reasoning_gym/games/tsumego.py +++ b/reasoning_gym/games/tsumego.py @@ -34,7 +34,7 @@ class TsumegoConfig: min_board_size: int = 9 max_board_size: int = 13 max_stones: int = 15 - size: int = 100 + size: int = 500 seed: Optional[int] = None def __post_init__(self):