diff --git a/pyproject.toml b/pyproject.toml index 1dc6a3cc..37728d40 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "reasoning_gym" -version = "0.1.0" +version = "0.1.1" authors = [ { name="Open-Thought community", email="andreas.koepf@xamla.com" }, ] diff --git a/python b/python deleted file mode 100644 index 3268b928..00000000 --- a/python +++ /dev/null @@ -1,648 +0,0 @@ -""" -Cognition tasks for training reasoning capabilities: -- Pattern recognition -- Sequence completion -- Logical reasoning -- Working memory -""" - -from .sequences import SequenceDataset, SequenceConfig, sequence_dataset - -__all__ = ["SequenceDataset", "SequenceConfig", "sequence_dataset"] -""" -Cognition tasks for training reasoning capabilities: -- Pattern recognition -- Sequence completion -- Logical reasoning -- Working memory -""" - -__all__ = [] -from dataclasses import dataclass -from enum import Enum -from random import Random -from typing import Optional, List - -class Operation(Enum): - """Basic mathematical operations that can be composed""" - ADD = "+" - MULTIPLY = "*" - SQUARE = "^2" - DOUBLE = "*2" - HALF = "/2" - PREV_PLUS = "prev+" # Add previous number - ALTERNATE = "alt" # Alternate between operations - COMPOSE = "compose" # Compose two operations - -@dataclass -class SequenceConfig: - """Configuration for sequence generation""" - min_terms: int = 4 # Minimum visible terms - max_terms: int = 8 # Maximum visible terms - min_value: int = -100 # Minimum allowed number - max_value: int = 100 # Maximum allowed number - max_complexity: int = 3 # Maximum number of operations to combine - seed: Optional[int] = None - size: int = 500 # Virtual dataset size - - def validate(self): - """Validate configuration parameters""" - assert self.min_terms >= 4, "need at least 4 terms to establish pattern" - assert self.max_terms >= self.min_terms - assert self.max_value > self.min_value - assert self.max_complexity >= 1 - -class PatternRule: - """Represents a composable sequence pattern rule""" - - def __init__(self, operations: List[Operation], parameters: List[int]): - self.operations = operations - self.parameters = parameters - - def apply(self, sequence: List[int], position: int) -> int: - """Apply the rule to generate the next number""" - result = sequence[position] # Start with current number - - for op, param in zip(self.operations, self.parameters): - if op == Operation.ADD: - result += param - elif op == Operation.MULTIPLY: - result *= param - elif op == Operation.SQUARE: - result = result * result - elif op == Operation.DOUBLE: - result *= 2 - elif op == Operation.HALF: - result //= 2 # Integer division - elif op == Operation.PREV_PLUS: - if position > 0: - result += sequence[position - 1] - - return result - - def to_string(self) -> str: - """Convert rule to human-readable string""" - parts = [] - for op, param in zip(self.operations, self.parameters): - if op == Operation.ADD: - parts.append(f"add {param}") - elif op == Operation.MULTIPLY: - parts.append(f"multiply by {param}") - elif op == Operation.SQUARE: - parts.append("square") - elif op == Operation.DOUBLE: - parts.append("double") - elif op == Operation.HALF: - parts.append("halve") - elif op == Operation.PREV_PLUS: - parts.append("add previous") - return " then ".join(parts) - -class PatternGenerator: - """Generates new pattern rules with configurable complexity""" - - def __init__(self, rng: Random, complexity: int = 1): - self.rng = rng - self.complexity = complexity - - def generate_rule(self) -> PatternRule: - """Generate a new pattern rule""" - operations = [] - parameters = [] - - # Number of operations based on complexity - num_ops = self.rng.randint(1, self.complexity + 1) - - for _ in range(num_ops): - # Pick random operation - op = self.rng.choice(list(Operation)) - operations.append(op) - - # Generate appropriate parameter - if op in [Operation.ADD, Operation.MULTIPLY]: - param = self.rng.randint(-10, 10) - while param == 0: # Avoid trivial operations - param = self.rng.randint(-10, 10) - parameters.append(param) - else: - parameters.append(0) # Some operations don't need parameters - - return PatternRule(operations, parameters) - - def is_interesting(self, sequence: List[int], max_value: int = 1000) -> bool: - """Check if sequence is interesting enough""" - if not sequence: - return False - - # Avoid too large numbers - if any(abs(x) > max_value for x in sequence): - return False - - # Avoid constant sequences - if len(set(sequence)) == 1: - return False - - # Avoid simple arithmetic progressions if complexity > 1 - if self.complexity > 1: - diffs = [sequence[i+1] - sequence[i] for i in range(len(sequence)-1)] - if len(set(diffs)) == 1: - return False - - return True - -class SequenceDataset: - """Generates number sequence completion tasks with dynamic pattern generation""" - - def __init__(self, config: SequenceConfig): - self.config = config - self.config.validate() - self.seed = config.seed if config.seed is not None else Random().randint(0, 2**32) - - def __len__(self) -> int: - return self.config.size - - def __iter__(self): - """Make the dataset iterable""" - self._current_idx = 0 - return self - - def __next__(self): - """Get next item in iteration""" - if self._current_idx >= self.config.size: - raise StopIteration - item = self[self._current_idx] - self._current_idx += 1 - return item - - def __getitem__(self, idx: int) -> dict: - """Generate a sequence task with a newly generated pattern""" - rng = Random(self.seed + idx) - - # Create pattern generator with random complexity - complexity = rng.randint(1, self.config.max_complexity) - generator = PatternGenerator(rng, complexity) - - # Generate pattern rule and sequence - max_attempts = 10 - for _ in range(max_attempts): - rule = generator.generate_rule() - - # Generate initial terms - num_terms = rng.randint(self.config.min_terms, self.config.max_terms) - sequence = [rng.randint(-10, 10)] # Start with random number - - # Generate remaining terms - try: - for i in range(1, num_terms + 1): # +1 for answer - next_term = rule.apply(sequence, i) - sequence.append(next_term) - - if generator.is_interesting(sequence): - break - except (OverflowError, ZeroDivisionError): - continue - else: - # If we couldn't generate an interesting sequence, fall back to simple addition - rule = PatternRule([Operation.ADD], [2]) - sequence = [i * 2 for i in range(num_terms + 1)] - - visible_terms = sequence[:-1] # Last term is the answer - - return { - "question": ", ".join(map(str, visible_terms)) + ", ?", - "answer": str(sequence[-1]), - "metadata": { - "rule": rule.to_string(), - "complexity": complexity, - "sequence": sequence - } - } - -def sequence_dataset( - min_terms: int = 4, - max_terms: int = 8, - min_value: int = -100, - max_value: int = 100, - max_complexity: int = 3, - seed: Optional[int] = None, - size: int = 500, -) -> SequenceDataset: - """Create a SequenceDataset with the given configuration.""" - config = SequenceConfig( - min_terms=min_terms, - max_terms=max_terms, - min_value=min_value, - max_value=max_value, - max_complexity=max_complexity, - seed=seed, - size=size, - ) - return SequenceDataset(config) -import pytest -from reasoning_gym.cognition.sequences import ( - SequenceDataset, - SequenceConfig, - Operation, - PatternRule, - PatternGenerator -) - -def test_sequence_config_validation(): - """Test that invalid configs raise appropriate errors""" - with pytest.raises(AssertionError): - config = SequenceConfig(min_terms=3) # Too few terms - config.validate() - - with pytest.raises(AssertionError): - config = SequenceConfig(min_terms=6, max_terms=5) - config.validate() - - with pytest.raises(AssertionError): - config = SequenceConfig(min_value=100, max_value=0) - config.validate() - -def test_pattern_rule(): - """Test pattern rule application""" - # Test simple addition - rule = PatternRule([Operation.ADD], [2]) - assert rule.apply([1, 3], 1) == 5 - - # Test composition - rule = PatternRule([Operation.DOUBLE, Operation.ADD], [0, 3]) - assert rule.apply([1, 4], 1) == 11 # (4 * 2) + 3 - -def test_sequence_dataset_deterministic(): - """Test that dataset generates same items with same seed""" - config = SequenceConfig(seed=42, size=10) - dataset1 = SequenceDataset(config) - dataset2 = SequenceDataset(config) - - for i in range(len(dataset1)): - assert dataset1[i] == dataset2[i] - -def test_sequence_dataset_items(): - """Test basic properties of generated items""" - config = SequenceConfig( - min_terms=4, - max_terms=6, - max_complexity=2, - size=50, - seed=42 - ) - dataset = SequenceDataset(config) - - for i in range(len(dataset)): - item = dataset[i] - assert isinstance(item, dict) - assert "question" in item - assert "answer" in item - assert "metadata" in item - - # Verify sequence format - question = item["question"] - assert question.endswith(", ?") - terms = [int(x) for x in question[:-3].split(", ")] - assert len(terms) >= config.min_terms - assert len(terms) <= config.max_terms - -def test_sequence_dataset_iteration(): - """Test that iteration respects dataset size""" - config = SequenceConfig(size=5, seed=42) - dataset = SequenceDataset(config) - - items = list(dataset) - assert len(items) == config.size - - # Test multiple iterations yield same items - assert items == list(dataset) -"""Propositional logic task generator""" -from dataclasses import dataclass -from enum import Enum -from random import Random -from typing import Any, List, Optional, Set, Tuple - - -class Operator(Enum): - """Basic logical operators""" - AND = "∧" - OR = "∨" - NOT = "¬" - IMPLIES = "→" - IFF = "↔" - - -@dataclass -class PropositionalLogicConfig: - """Configuration for propositional logic task generation""" - min_vars: int = 2 # Minimum number of variables - max_vars: int = 4 # Maximum number of variables - min_statements: int = 2 # Minimum number of given statements - max_statements: int = 4 # Maximum number of statements - max_complexity: int = 3 # Maximum operator depth - seed: Optional[int] = None - size: int = 500 # Virtual dataset size - - def validate(self): - """Validate configuration parameters""" - assert self.min_vars > 0, "min_vars must be positive" - assert self.max_vars >= self.min_vars, "max_vars must be >= min_vars" - assert self.min_statements > 0, "min_statements must be positive" - assert self.max_statements >= self.min_statements - assert self.max_complexity > 0, "max_complexity must be positive" - - -class Expression: - """Represents a logical expression that can be evaluated""" - - def __init__(self, operator: Optional[Operator], left: Any, right: Optional[Any] = None): - self.operator = operator - self.left = left - self.right = right - - def evaluate(self, assignments: dict[str, bool]) -> bool: - """Evaluate expression with given variable assignments""" - if self.operator is None: - return assignments[self.left] # Variable - elif self.operator == Operator.NOT: - return not self.left.evaluate(assignments) - elif self.operator == Operator.AND: - return self.left.evaluate(assignments) and self.right.evaluate(assignments) - elif self.operator == Operator.OR: - return self.left.evaluate(assignments) or self.right.evaluate(assignments) - elif self.operator == Operator.IMPLIES: - return (not self.left.evaluate(assignments)) or self.right.evaluate(assignments) - elif self.operator == Operator.IFF: - return self.left.evaluate(assignments) == self.right.evaluate(assignments) - raise ValueError(f"Unknown operator: {self.operator}") - - def __str__(self) -> str: - if self.operator is None: - return self.left - elif self.operator == Operator.NOT: - return f"{self.operator.value}{self.left}" - else: - return f"({self.left} {self.operator.value} {self.right})" - - -class PropositionalLogicDataset: - """Generates propositional logic reasoning tasks""" - - def __init__(self, config: PropositionalLogicConfig): - self.config = config - self.config.validate() - self.seed = config.seed if config.seed is not None else Random().randint(0, 2**32) - - def __len__(self) -> int: - return self.config.size - - def __iter__(self): - self._current_idx = 0 - return self - - def __next__(self): - if self._current_idx >= self.config.size: - raise StopIteration - item = self[self._current_idx] - self._current_idx += 1 - return item - - def __getitem__(self, idx: int) -> dict[str, Any]: - """Generate a single propositional logic task""" - rng = Random(self.seed + idx) - - # Generate random variables - num_vars = rng.randint(self.config.min_vars, self.config.max_vars) - variables = [chr(ord('P') + i) for i in range(num_vars)] - - # Generate premises - num_statements = rng.randint(self.config.min_statements, self.config.max_statements) - premises = self._generate_premises(rng, variables, num_statements) - - # Generate a valid conclusion - conclusion = self._find_valid_conclusion(rng, premises, variables) - - # Format question - question = "Given:\n" - for i, premise in enumerate(premises, 1): - question += f"{i}. {premise}\n" - question += "What can we conclude?" - - return { - "question": question, - "answer": str(conclusion), - "metadata": { - "premises": [str(p) for p in premises], - "variables": variables, - "complexity": self._measure_complexity(conclusion) - } - } - - def _generate_premises(self, rng: Random, variables: List[str], num_statements: int) -> List[Expression]: - """Generate a list of premise statements""" - premises = [] - for _ in range(num_statements): - depth = rng.randint(1, self.config.max_complexity) - premises.append(self._generate_expression(rng, variables, depth)) - return premises - - def _generate_expression(self, rng: Random, variables: List[str], depth: int) -> Expression: - """Generate a random logical expression""" - if depth <= 1: - return Expression(None, rng.choice(variables)) - - operator = rng.choice(list(Operator)) - if operator == Operator.NOT: - return Expression(operator, self._generate_expression(rng, variables, depth - 1)) - else: - left = self._generate_expression(rng, variables, depth - 1) - right = self._generate_expression(rng, variables, depth - 1) - return Expression(operator, left, right) - - def _find_valid_conclusion(self, rng: Random, premises: List[Expression], variables: List[str]) -> Expression: - """Find a valid conclusion that follows from the premises""" - # Try random conclusions until we find a valid one - for _ in range(100): - candidate = self._generate_expression(rng, variables, 2) - if self._is_valid_conclusion(premises, candidate): - return candidate - - # Fallback to a simple conclusion - return Expression(None, variables[0]) - - def _is_valid_conclusion(self, premises: List[Expression], conclusion: Expression) -> bool: - """Check if conclusion follows from premises using truth tables""" - variables = self._collect_variables(premises + [conclusion]) - - # Check all possible assignments - for assignment in self._generate_assignments(variables): - # If premises are true but conclusion is false, invalid - if all(p.evaluate(assignment) for p in premises) and not conclusion.evaluate(assignment): - return False - return True - - def _collect_variables(self, expressions: List[Expression]) -> Set[str]: - """Collect all variables used in expressions""" - variables = set() - for expr in expressions: - if expr.operator is None: - variables.add(expr.left) - else: - if isinstance(expr.left, Expression): - variables.update(self._collect_variables([expr.left])) - if expr.right and isinstance(expr.right, Expression): - variables.update(self._collect_variables([expr.right])) - return variables - - def _generate_assignments(self, variables: Set[str]) -> List[dict[str, bool]]: - """Generate all possible truth value assignments""" - assignments = [] - for i in range(2 ** len(variables)): - assignment = {} - for j, var in enumerate(sorted(variables)): - assignment[var] = bool((i >> j) & 1) - assignments.append(assignment) - return assignments - - def _measure_complexity(self, expression: Expression) -> int: - """Measure the complexity of an expression""" - if expression.operator is None: - return 1 - elif expression.operator == Operator.NOT: - return 1 + self._measure_complexity(expression.left) - else: - return 1 + self._measure_complexity(expression.left) + self._measure_complexity(expression.right) - - -def propositional_logic_dataset( - min_vars: int = 2, - max_vars: int = 4, - min_statements: int = 2, - max_statements: int = 4, - max_complexity: int = 3, - seed: Optional[int] = None, - size: int = 500, -) -> PropositionalLogicDataset: - """Create a PropositionalLogicDataset with the given configuration.""" - config = PropositionalLogicConfig( - min_vars=min_vars, - max_vars=max_vars, - min_statements=min_statements, - max_statements=max_statements, - max_complexity=max_complexity, - seed=seed, - size=size, - ) - return PropositionalLogicDataset(config) -"""Tests for propositional logic task generation""" -import pytest - -from reasoning_gym.logic.propositional_logic import ( - Expression, - Operator, - PropositionalLogicConfig, - PropositionalLogicDataset, -) - - -def test_propositional_logic_config_validation(): - """Test that invalid configs raise appropriate errors""" - with pytest.raises(AssertionError): - config = PropositionalLogicConfig(min_vars=0) - config.validate() - - with pytest.raises(AssertionError): - config = PropositionalLogicConfig(min_vars=4, max_vars=3) - config.validate() - - with pytest.raises(AssertionError): - config = PropositionalLogicConfig(min_statements=0) - config.validate() - - -def test_expression_evaluation(): - """Test logical expression evaluation""" - # Test simple variable - expr = Expression(None, "P") - assert expr.evaluate({"P": True}) is True - assert expr.evaluate({"P": False}) is False - - # Test NOT - expr = Expression(Operator.NOT, Expression(None, "P")) - assert expr.evaluate({"P": True}) is False - assert expr.evaluate({"P": False}) is True - - # Test AND - expr = Expression( - Operator.AND, - Expression(None, "P"), - Expression(None, "Q") - ) - assert expr.evaluate({"P": True, "Q": True}) is True - assert expr.evaluate({"P": True, "Q": False}) is False - - # Test IMPLIES - expr = Expression( - Operator.IMPLIES, - Expression(None, "P"), - Expression(None, "Q") - ) - assert expr.evaluate({"P": True, "Q": False}) is False - assert expr.evaluate({"P": True, "Q": True}) is True - assert expr.evaluate({"P": False, "Q": False}) is True - - -def test_propositional_logic_dataset_deterministic(): - """Test that dataset generates same items with same seed""" - config = PropositionalLogicConfig(seed=42, size=10) - dataset1 = PropositionalLogicDataset(config) - dataset2 = PropositionalLogicDataset(config) - - for i in range(len(dataset1)): - assert dataset1[i] == dataset2[i] - - -def test_propositional_logic_dataset_items(): - """Test basic properties of generated items""" - config = PropositionalLogicConfig( - min_vars=2, - max_vars=3, - min_statements=2, - max_statements=3, - max_complexity=2, - size=10, - seed=42 - ) - dataset = PropositionalLogicDataset(config) - - for i in range(len(dataset)): - item = dataset[i] - assert isinstance(item, dict) - assert "question" in item - assert "answer" in item - assert "metadata" in item - assert isinstance(item["metadata"]["premises"], list) - assert isinstance(item["metadata"]["variables"], list) - assert isinstance(item["metadata"]["complexity"], int) - - -def test_propositional_logic_dataset_iteration(): - """Test that iteration respects dataset size""" - config = PropositionalLogicConfig(size=5, seed=42) - dataset = PropositionalLogicDataset(config) - - items = list(dataset) - assert len(items) == config.size - - # Test multiple iterations yield same items - assert items == list(dataset) -""" -Logic tasks for training reasoning capabilities: -- Propositional logic -- Predicate logic -- Set theory -- Syllogisms -""" - -from .propositional_logic import PropositionalLogicConfig, PropositionalLogicDataset, propositional_logic_dataset - -__all__ = ["PropositionalLogicConfig", "PropositionalLogicDataset", "propositional_logic_dataset"]