diff --git a/python b/python index 59cdadd3..3268b928 100644 --- a/python +++ b/python @@ -315,3 +315,334 @@ def test_sequence_dataset_iteration(): # Test multiple iterations yield same items assert items == list(dataset) +"""Propositional logic task generator""" +from dataclasses import dataclass +from enum import Enum +from random import Random +from typing import Any, List, Optional, Set, Tuple + + +class Operator(Enum): + """Basic logical operators""" + AND = "∧" + OR = "∨" + NOT = "¬" + IMPLIES = "→" + IFF = "↔" + + +@dataclass +class PropositionalLogicConfig: + """Configuration for propositional logic task generation""" + min_vars: int = 2 # Minimum number of variables + max_vars: int = 4 # Maximum number of variables + min_statements: int = 2 # Minimum number of given statements + max_statements: int = 4 # Maximum number of statements + max_complexity: int = 3 # Maximum operator depth + seed: Optional[int] = None + size: int = 500 # Virtual dataset size + + def validate(self): + """Validate configuration parameters""" + assert self.min_vars > 0, "min_vars must be positive" + assert self.max_vars >= self.min_vars, "max_vars must be >= min_vars" + assert self.min_statements > 0, "min_statements must be positive" + assert self.max_statements >= self.min_statements + assert self.max_complexity > 0, "max_complexity must be positive" + + +class Expression: + """Represents a logical expression that can be evaluated""" + + def __init__(self, operator: Optional[Operator], left: Any, right: Optional[Any] = None): + self.operator = operator + self.left = left + self.right = right + + def evaluate(self, assignments: dict[str, bool]) -> bool: + """Evaluate expression with given variable assignments""" + if self.operator is None: + return assignments[self.left] # Variable + elif self.operator == Operator.NOT: + return not self.left.evaluate(assignments) + elif self.operator == Operator.AND: + return self.left.evaluate(assignments) and self.right.evaluate(assignments) + elif self.operator == Operator.OR: + return self.left.evaluate(assignments) or self.right.evaluate(assignments) + elif self.operator == Operator.IMPLIES: + return (not self.left.evaluate(assignments)) or self.right.evaluate(assignments) + elif self.operator == Operator.IFF: + return self.left.evaluate(assignments) == self.right.evaluate(assignments) + raise ValueError(f"Unknown operator: {self.operator}") + + def __str__(self) -> str: + if self.operator is None: + return self.left + elif self.operator == Operator.NOT: + return f"{self.operator.value}{self.left}" + else: + return f"({self.left} {self.operator.value} {self.right})" + + +class PropositionalLogicDataset: + """Generates propositional logic reasoning tasks""" + + def __init__(self, config: PropositionalLogicConfig): + self.config = config + self.config.validate() + self.seed = config.seed if config.seed is not None else Random().randint(0, 2**32) + + def __len__(self) -> int: + return self.config.size + + def __iter__(self): + self._current_idx = 0 + return self + + def __next__(self): + if self._current_idx >= self.config.size: + raise StopIteration + item = self[self._current_idx] + self._current_idx += 1 + return item + + def __getitem__(self, idx: int) -> dict[str, Any]: + """Generate a single propositional logic task""" + rng = Random(self.seed + idx) + + # Generate random variables + num_vars = rng.randint(self.config.min_vars, self.config.max_vars) + variables = [chr(ord('P') + i) for i in range(num_vars)] + + # Generate premises + num_statements = rng.randint(self.config.min_statements, self.config.max_statements) + premises = self._generate_premises(rng, variables, num_statements) + + # Generate a valid conclusion + conclusion = self._find_valid_conclusion(rng, premises, variables) + + # Format question + question = "Given:\n" + for i, premise in enumerate(premises, 1): + question += f"{i}. {premise}\n" + question += "What can we conclude?" + + return { + "question": question, + "answer": str(conclusion), + "metadata": { + "premises": [str(p) for p in premises], + "variables": variables, + "complexity": self._measure_complexity(conclusion) + } + } + + def _generate_premises(self, rng: Random, variables: List[str], num_statements: int) -> List[Expression]: + """Generate a list of premise statements""" + premises = [] + for _ in range(num_statements): + depth = rng.randint(1, self.config.max_complexity) + premises.append(self._generate_expression(rng, variables, depth)) + return premises + + def _generate_expression(self, rng: Random, variables: List[str], depth: int) -> Expression: + """Generate a random logical expression""" + if depth <= 1: + return Expression(None, rng.choice(variables)) + + operator = rng.choice(list(Operator)) + if operator == Operator.NOT: + return Expression(operator, self._generate_expression(rng, variables, depth - 1)) + else: + left = self._generate_expression(rng, variables, depth - 1) + right = self._generate_expression(rng, variables, depth - 1) + return Expression(operator, left, right) + + def _find_valid_conclusion(self, rng: Random, premises: List[Expression], variables: List[str]) -> Expression: + """Find a valid conclusion that follows from the premises""" + # Try random conclusions until we find a valid one + for _ in range(100): + candidate = self._generate_expression(rng, variables, 2) + if self._is_valid_conclusion(premises, candidate): + return candidate + + # Fallback to a simple conclusion + return Expression(None, variables[0]) + + def _is_valid_conclusion(self, premises: List[Expression], conclusion: Expression) -> bool: + """Check if conclusion follows from premises using truth tables""" + variables = self._collect_variables(premises + [conclusion]) + + # Check all possible assignments + for assignment in self._generate_assignments(variables): + # If premises are true but conclusion is false, invalid + if all(p.evaluate(assignment) for p in premises) and not conclusion.evaluate(assignment): + return False + return True + + def _collect_variables(self, expressions: List[Expression]) -> Set[str]: + """Collect all variables used in expressions""" + variables = set() + for expr in expressions: + if expr.operator is None: + variables.add(expr.left) + else: + if isinstance(expr.left, Expression): + variables.update(self._collect_variables([expr.left])) + if expr.right and isinstance(expr.right, Expression): + variables.update(self._collect_variables([expr.right])) + return variables + + def _generate_assignments(self, variables: Set[str]) -> List[dict[str, bool]]: + """Generate all possible truth value assignments""" + assignments = [] + for i in range(2 ** len(variables)): + assignment = {} + for j, var in enumerate(sorted(variables)): + assignment[var] = bool((i >> j) & 1) + assignments.append(assignment) + return assignments + + def _measure_complexity(self, expression: Expression) -> int: + """Measure the complexity of an expression""" + if expression.operator is None: + return 1 + elif expression.operator == Operator.NOT: + return 1 + self._measure_complexity(expression.left) + else: + return 1 + self._measure_complexity(expression.left) + self._measure_complexity(expression.right) + + +def propositional_logic_dataset( + min_vars: int = 2, + max_vars: int = 4, + min_statements: int = 2, + max_statements: int = 4, + max_complexity: int = 3, + seed: Optional[int] = None, + size: int = 500, +) -> PropositionalLogicDataset: + """Create a PropositionalLogicDataset with the given configuration.""" + config = PropositionalLogicConfig( + min_vars=min_vars, + max_vars=max_vars, + min_statements=min_statements, + max_statements=max_statements, + max_complexity=max_complexity, + seed=seed, + size=size, + ) + return PropositionalLogicDataset(config) +"""Tests for propositional logic task generation""" +import pytest + +from reasoning_gym.logic.propositional_logic import ( + Expression, + Operator, + PropositionalLogicConfig, + PropositionalLogicDataset, +) + + +def test_propositional_logic_config_validation(): + """Test that invalid configs raise appropriate errors""" + with pytest.raises(AssertionError): + config = PropositionalLogicConfig(min_vars=0) + config.validate() + + with pytest.raises(AssertionError): + config = PropositionalLogicConfig(min_vars=4, max_vars=3) + config.validate() + + with pytest.raises(AssertionError): + config = PropositionalLogicConfig(min_statements=0) + config.validate() + + +def test_expression_evaluation(): + """Test logical expression evaluation""" + # Test simple variable + expr = Expression(None, "P") + assert expr.evaluate({"P": True}) is True + assert expr.evaluate({"P": False}) is False + + # Test NOT + expr = Expression(Operator.NOT, Expression(None, "P")) + assert expr.evaluate({"P": True}) is False + assert expr.evaluate({"P": False}) is True + + # Test AND + expr = Expression( + Operator.AND, + Expression(None, "P"), + Expression(None, "Q") + ) + assert expr.evaluate({"P": True, "Q": True}) is True + assert expr.evaluate({"P": True, "Q": False}) is False + + # Test IMPLIES + expr = Expression( + Operator.IMPLIES, + Expression(None, "P"), + Expression(None, "Q") + ) + assert expr.evaluate({"P": True, "Q": False}) is False + assert expr.evaluate({"P": True, "Q": True}) is True + assert expr.evaluate({"P": False, "Q": False}) is True + + +def test_propositional_logic_dataset_deterministic(): + """Test that dataset generates same items with same seed""" + config = PropositionalLogicConfig(seed=42, size=10) + dataset1 = PropositionalLogicDataset(config) + dataset2 = PropositionalLogicDataset(config) + + for i in range(len(dataset1)): + assert dataset1[i] == dataset2[i] + + +def test_propositional_logic_dataset_items(): + """Test basic properties of generated items""" + config = PropositionalLogicConfig( + min_vars=2, + max_vars=3, + min_statements=2, + max_statements=3, + max_complexity=2, + size=10, + seed=42 + ) + dataset = PropositionalLogicDataset(config) + + for i in range(len(dataset)): + item = dataset[i] + assert isinstance(item, dict) + assert "question" in item + assert "answer" in item + assert "metadata" in item + assert isinstance(item["metadata"]["premises"], list) + assert isinstance(item["metadata"]["variables"], list) + assert isinstance(item["metadata"]["complexity"], int) + + +def test_propositional_logic_dataset_iteration(): + """Test that iteration respects dataset size""" + config = PropositionalLogicConfig(size=5, seed=42) + dataset = PropositionalLogicDataset(config) + + items = list(dataset) + assert len(items) == config.size + + # Test multiple iterations yield same items + assert items == list(dataset) +""" +Logic tasks for training reasoning capabilities: +- Propositional logic +- Predicate logic +- Set theory +- Syllogisms +""" + +from .propositional_logic import PropositionalLogicConfig, PropositionalLogicDataset, propositional_logic_dataset + +__all__ = ["PropositionalLogicConfig", "PropositionalLogicDataset", "propositional_logic_dataset"] diff --git a/reasoning_gym/logic/__init__.py b/reasoning_gym/logic/__init__.py new file mode 100644 index 00000000..9b46db9e --- /dev/null +++ b/reasoning_gym/logic/__init__.py @@ -0,0 +1,11 @@ +""" +Logic tasks for training reasoning capabilities: +- Propositional logic +- Predicate logic +- Set theory +- Syllogisms +""" + +from .propositional_logic import PropositionalLogicConfig, PropositionalLogicDataset, propositional_logic_dataset + +__all__ = ["PropositionalLogicConfig", "PropositionalLogicDataset", "propositional_logic_dataset"] diff --git a/reasoning_gym/logic/propositional_logic.py b/reasoning_gym/logic/propositional_logic.py new file mode 100644 index 00000000..8897d2fa --- /dev/null +++ b/reasoning_gym/logic/propositional_logic.py @@ -0,0 +1,221 @@ +"""Propositional logic task generator""" + +from dataclasses import dataclass +from enum import Enum +from random import Random +from typing import Any, List, Optional, Set, Tuple + + +class Operator(Enum): + """Basic logical operators""" + + AND = "∧" + OR = "∨" + NOT = "¬" + IMPLIES = "→" + IFF = "↔" + + +@dataclass +class PropositionalLogicConfig: + """Configuration for propositional logic task generation""" + + min_vars: int = 2 # Minimum number of variables + max_vars: int = 4 # Maximum number of variables + min_statements: int = 2 # Minimum number of given statements + max_statements: int = 4 # Maximum number of statements + max_complexity: int = 3 # Maximum operator depth + seed: Optional[int] = None + size: int = 500 # Virtual dataset size + + def validate(self): + """Validate configuration parameters""" + assert self.min_vars > 0, "min_vars must be positive" + assert self.max_vars >= self.min_vars, "max_vars must be >= min_vars" + assert self.min_statements > 0, "min_statements must be positive" + assert self.max_statements >= self.min_statements + assert self.max_complexity > 0, "max_complexity must be positive" + + +class Expression: + """Represents a logical expression that can be evaluated""" + + def __init__(self, operator: Optional[Operator], left: Any, right: Optional[Any] = None): + self.operator = operator + self.left = left + self.right = right + + def evaluate(self, assignments: dict[str, bool]) -> bool: + """Evaluate expression with given variable assignments""" + if self.operator is None: + return assignments[self.left] # Variable + elif self.operator == Operator.NOT: + return not self.left.evaluate(assignments) + elif self.operator == Operator.AND: + return self.left.evaluate(assignments) and self.right.evaluate(assignments) + elif self.operator == Operator.OR: + return self.left.evaluate(assignments) or self.right.evaluate(assignments) + elif self.operator == Operator.IMPLIES: + return (not self.left.evaluate(assignments)) or self.right.evaluate(assignments) + elif self.operator == Operator.IFF: + return self.left.evaluate(assignments) == self.right.evaluate(assignments) + raise ValueError(f"Unknown operator: {self.operator}") + + def __str__(self) -> str: + if self.operator is None: + return self.left + elif self.operator == Operator.NOT: + return f"{self.operator.value}{self.left}" + else: + return f"({self.left} {self.operator.value} {self.right})" + + +class PropositionalLogicDataset: + """Generates propositional logic reasoning tasks""" + + def __init__(self, config: PropositionalLogicConfig): + self.config = config + self.config.validate() + self.seed = config.seed if config.seed is not None else Random().randint(0, 2**32) + + def __len__(self) -> int: + return self.config.size + + def __iter__(self): + self._current_idx = 0 + return self + + def __next__(self): + if self._current_idx >= self.config.size: + raise StopIteration + item = self[self._current_idx] + self._current_idx += 1 + return item + + def __getitem__(self, idx: int) -> dict[str, Any]: + """Generate a single propositional logic task""" + rng = Random(self.seed + idx) + + # Generate random variables + num_vars = rng.randint(self.config.min_vars, self.config.max_vars) + variables = [chr(ord("P") + i) for i in range(num_vars)] + + # Generate premises + num_statements = rng.randint(self.config.min_statements, self.config.max_statements) + premises = self._generate_premises(rng, variables, num_statements) + + # Generate a valid conclusion + conclusion = self._find_valid_conclusion(rng, premises, variables) + + # Format question + question = "Given:\n" + for i, premise in enumerate(premises, 1): + question += f"{i}. {premise}\n" + question += "What can we conclude?" + + return { + "question": question, + "answer": str(conclusion), + "metadata": { + "premises": [str(p) for p in premises], + "variables": variables, + "complexity": self._measure_complexity(conclusion), + }, + } + + def _generate_premises(self, rng: Random, variables: List[str], num_statements: int) -> List[Expression]: + """Generate a list of premise statements""" + premises = [] + for _ in range(num_statements): + depth = rng.randint(1, self.config.max_complexity) + premises.append(self._generate_expression(rng, variables, depth)) + return premises + + def _generate_expression(self, rng: Random, variables: List[str], depth: int) -> Expression: + """Generate a random logical expression""" + if depth <= 1: + return Expression(None, rng.choice(variables)) + + operator = rng.choice(list(Operator)) + if operator == Operator.NOT: + return Expression(operator, self._generate_expression(rng, variables, depth - 1)) + else: + left = self._generate_expression(rng, variables, depth - 1) + right = self._generate_expression(rng, variables, depth - 1) + return Expression(operator, left, right) + + def _find_valid_conclusion(self, rng: Random, premises: List[Expression], variables: List[str]) -> Expression: + """Find a valid conclusion that follows from the premises""" + # Try random conclusions until we find a valid one + for _ in range(100): + candidate = self._generate_expression(rng, variables, 2) + if self._is_valid_conclusion(premises, candidate): + return candidate + + # Fallback to a simple conclusion + return Expression(None, variables[0]) + + def _is_valid_conclusion(self, premises: List[Expression], conclusion: Expression) -> bool: + """Check if conclusion follows from premises using truth tables""" + variables = self._collect_variables(premises + [conclusion]) + + # Check all possible assignments + for assignment in self._generate_assignments(variables): + # If premises are true but conclusion is false, invalid + if all(p.evaluate(assignment) for p in premises) and not conclusion.evaluate(assignment): + return False + return True + + def _collect_variables(self, expressions: List[Expression]) -> Set[str]: + """Collect all variables used in expressions""" + variables = set() + for expr in expressions: + if expr.operator is None: + variables.add(expr.left) + else: + if isinstance(expr.left, Expression): + variables.update(self._collect_variables([expr.left])) + if expr.right and isinstance(expr.right, Expression): + variables.update(self._collect_variables([expr.right])) + return variables + + def _generate_assignments(self, variables: Set[str]) -> List[dict[str, bool]]: + """Generate all possible truth value assignments""" + assignments = [] + for i in range(2 ** len(variables)): + assignment = {} + for j, var in enumerate(sorted(variables)): + assignment[var] = bool((i >> j) & 1) + assignments.append(assignment) + return assignments + + def _measure_complexity(self, expression: Expression) -> int: + """Measure the complexity of an expression""" + if expression.operator is None: + return 1 + elif expression.operator == Operator.NOT: + return 1 + self._measure_complexity(expression.left) + else: + return 1 + self._measure_complexity(expression.left) + self._measure_complexity(expression.right) + + +def propositional_logic_dataset( + min_vars: int = 2, + max_vars: int = 4, + min_statements: int = 2, + max_statements: int = 4, + max_complexity: int = 3, + seed: Optional[int] = None, + size: int = 500, +) -> PropositionalLogicDataset: + """Create a PropositionalLogicDataset with the given configuration.""" + config = PropositionalLogicConfig( + min_vars=min_vars, + max_vars=max_vars, + min_statements=min_statements, + max_statements=max_statements, + max_complexity=max_complexity, + seed=seed, + size=size, + ) + return PropositionalLogicDataset(config) diff --git a/tests/test_propositional_logic.py b/tests/test_propositional_logic.py new file mode 100644 index 00000000..aff2ebc3 --- /dev/null +++ b/tests/test_propositional_logic.py @@ -0,0 +1,89 @@ +"""Tests for propositional logic task generation""" + +import pytest + +from reasoning_gym.logic.propositional_logic import ( + Expression, + Operator, + PropositionalLogicConfig, + PropositionalLogicDataset, +) + + +def test_propositional_logic_config_validation(): + """Test that invalid configs raise appropriate errors""" + with pytest.raises(AssertionError): + config = PropositionalLogicConfig(min_vars=0) + config.validate() + + with pytest.raises(AssertionError): + config = PropositionalLogicConfig(min_vars=4, max_vars=3) + config.validate() + + with pytest.raises(AssertionError): + config = PropositionalLogicConfig(min_statements=0) + config.validate() + + +def test_expression_evaluation(): + """Test logical expression evaluation""" + # Test simple variable + expr = Expression(None, "P") + assert expr.evaluate({"P": True}) is True + assert expr.evaluate({"P": False}) is False + + # Test NOT + expr = Expression(Operator.NOT, Expression(None, "P")) + assert expr.evaluate({"P": True}) is False + assert expr.evaluate({"P": False}) is True + + # Test AND + expr = Expression(Operator.AND, Expression(None, "P"), Expression(None, "Q")) + assert expr.evaluate({"P": True, "Q": True}) is True + assert expr.evaluate({"P": True, "Q": False}) is False + + # Test IMPLIES + expr = Expression(Operator.IMPLIES, Expression(None, "P"), Expression(None, "Q")) + assert expr.evaluate({"P": True, "Q": False}) is False + assert expr.evaluate({"P": True, "Q": True}) is True + assert expr.evaluate({"P": False, "Q": False}) is True + + +def test_propositional_logic_dataset_deterministic(): + """Test that dataset generates same items with same seed""" + config = PropositionalLogicConfig(seed=42, size=10) + dataset1 = PropositionalLogicDataset(config) + dataset2 = PropositionalLogicDataset(config) + + for i in range(len(dataset1)): + assert dataset1[i] == dataset2[i] + + +def test_propositional_logic_dataset_items(): + """Test basic properties of generated items""" + config = PropositionalLogicConfig( + min_vars=2, max_vars=3, min_statements=2, max_statements=3, max_complexity=2, size=10, seed=42 + ) + dataset = PropositionalLogicDataset(config) + + for i in range(len(dataset)): + item = dataset[i] + assert isinstance(item, dict) + assert "question" in item + assert "answer" in item + assert "metadata" in item + assert isinstance(item["metadata"]["premises"], list) + assert isinstance(item["metadata"]["variables"], list) + assert isinstance(item["metadata"]["complexity"], int) + + +def test_propositional_logic_dataset_iteration(): + """Test that iteration respects dataset size""" + config = PropositionalLogicConfig(size=5, seed=42) + dataset = PropositionalLogicDataset(config) + + items = list(dataset) + assert len(items) == config.size + + # Test multiple iterations yield same items + assert items == list(dataset)