diff --git a/README.md b/README.md index 622a7886..413d7809 100644 --- a/README.md +++ b/README.md @@ -4,46 +4,102 @@ We are building a python library of procedural dataset generators and algorithmi The goal is to generate virtually infinite data with adjustable complexity. -## Quick Start +### Available Generators + +#### Basic Arithmetic +Generates arithmetic problems with configurable complexity: ```python -from reasoning_gym.arithmetic import ChainSum, ChainSumConfig +from reasoning_gym.arithmetic import ArithmeticDataset, ArithmeticDatasetConfig -# configure a simple arithmetic task generator -config = ChainSumConfig( - min_terms=2, - max_terms=6, - min_digits=1, - max_digits=4, - allow_negation=False, # Only positive numbers - size=5, # virtual size of dataset - seed=42 # deterministic results +config = ArithmeticDatasetConfig( + min_terms=2, # Minimum number of terms in expression + max_terms=4, # Maximum number of terms + min_digits=1, # Minimum digits per number + max_digits=2, # Maximum digits per number + allow_parentheses=True, # Include nested expressions + size=5, # Number of problems to generate + seed=42 # For reproducibility ) -# create the dataset -dataset = ChainSum(config) - -# print some examples +dataset = ArithmeticDataset(config) for item in dataset: print(item) ``` Example output: ``` -{'question': '4 + 3 =', 'answer': '7', 'metadata': {'num_terms': 2, 'num_digits': 1, 'expression': '4 + 3'}} -{'question': '812 + 880 =', 'answer': '1692', 'metadata': {'num_terms': 2, 'num_digits': 3, 'expression': '812 + 880'}} -{'question': '2 + 6 + 3 + 4 + 0 =', 'answer': '15', 'metadata': {'num_terms': 5, 'num_digits': 1, 'expression': '2 + 6 + 3 + 4 + 0'}} -{'question': '8995 - 5221 + 2341 + 5967 =', 'answer': '12082', 'metadata': {'num_terms': 4, 'num_digits': 4, 'expression': '8995 - 5221 + 2341 + 5967'}} -{'question': '1654 + 4744 =', 'answer': '6398', 'metadata': {'num_terms': 2, 'num_digits': 4, 'expression': '1654 + 4744'}} +{'question': '-1 + -5 * 8 + -8 =', 'answer': '-49', 'metadata': {'num_terms': 4, 'num_digits': 1, 'expression': '-1 + -5 * 8 + -8'}} +{'question': '19 - 17 =', 'answer': '2', 'metadata': {'num_terms': 2, 'num_digits': 2, 'expression': '19 - 17'}} +{'question': '3 + -6 * -9 =', 'answer': '57', 'metadata': {'num_terms': 3, 'num_digits': 1, 'expression': '3 + -6 * -9'}} +{'question': '-22 - -94 + -97 =', 'answer': '-25', 'metadata': {'num_terms': 3, 'num_digits': 2, 'expression': '-22 - -94 + -97'}} +{'question': '51 * 63 =', 'answer': '3213', 'metadata': {'num_terms': 2, 'num_digits': 2, 'expression': '51 * 63'}} ``` +#### Chain Sum +Generates addition/subtraction problems with configurable complexity: +```python +from reasoning_gym.arithmetic import ChainSum, ChainSumConfig -### Generator / Environment Ideas +config = ChainSumConfig( + min_terms=2, # Minimum numbers to add/subtract + max_terms=6, # Maximum numbers + min_digits=1, # Minimum digits per number + max_digits=4, # Maximum digits per number + allow_negation=True, # Allow negative numbers + size=5, # Number of problems + seed=42 # For reproducibility +) -- math tasks -- algorithmic tasks (counting, sorting, re-ordering, ..) -- logic riddles -- logic inductive programming tasks +dataset = ChainSum(config) +for item in dataset: + print(item) +``` + +Example output: +``` +{'question': '-1 + -2 =', 'answer': '-3', 'metadata': {'num_terms': 2, 'num_digits': 1, 'expression': '-1 + -2'}} +{'question': '426 + 562 =', 'answer': '988', 'metadata': {'num_terms': 2, 'num_digits': 3, 'expression': '426 + 562'}} +{'question': '-4 + 3 + -2 + 0 + -9 =', 'answer': '-12', 'metadata': {'num_terms': 5, 'num_digits': 1, 'expression': '-4 + 3 + -2 + 0 + -9'}} +{'question': '5992 - -1556 + -7316 + -65 =', 'answer': '167', 'metadata': {'num_terms': 4, 'num_digits': 4, 'expression': '5992 - -1556 + -7316 + -65'}} +{'question': '-8690 + 9288 =', 'answer': '598', 'metadata': {'num_terms': 2, 'num_digits': 4, 'expression': '-8690 + 9288'}} +``` + +#### Sequence Completion +Generates number sequence completion tasks with dynamic pattern generation: +```python +from reasoning_gym.cognition import SequenceDataset, SequenceConfig + +config = SequenceConfig( + min_terms=4, # Minimum visible terms + max_terms=8, # Maximum visible terms + min_value=-100, # Minimum allowed number + max_value=100, # Maximum allowed number + max_complexity=3, # Maximum operations to combine + size=5, # Number of sequences + seed=42 # For reproducibility +) + +dataset = SequenceDataset(config) +for item in dataset: + print(item) +``` + +Example output: +``` +{'question': '3, 6, 12, 24, 48, 96, 192, 384, ?', 'answer': '768', 'metadata': {'rule': 'double', 'complexity': 3, 'sequence': [3, 6, 12, 24, 48, 96, 192, 384, 768]}} +{'question': '8, 14, 20, 26, 32, 38, 44, ?', 'answer': '50', 'metadata': {'rule': 'add 6', 'complexity': 1, 'sequence': [8, 14, 20, 26, 32, 38, 44, 50]}} +{'question': '8, 4, 2, 1, 0, 0, 0, ?', 'answer': '0', 'metadata': {'rule': 'halve', 'complexity': 2, 'sequence': [8, 4, 2, 1, 0, 0, 0, 0]}} +{'question': '-6, 15, -6, 15, ?', 'answer': '-6', 'metadata': {'rule': 'multiply by -1 then add 9', 'complexity': 2, 'sequence': [-6, 15, -6, 15, -6]}} +{'question': '10, 2, -6, -14, -22, -30, ?', 'answer': '-38', 'metadata': {'rule': 'add -8', 'complexity': 1, 'sequence': [10, 2, -6, -14, -22, -30, -38]}} +``` + +### Future Generator Ideas + +- More complex math tasks (algebra, geometry) +- Algorithmic tasks (counting, sorting, re-ordering) +- Logic riddles +- Logic inductive programming tasks - ARC-AGI synthetic riddles