feat: Add propositional logic dataset generator with comprehensive logical reasoning tasks

This commit is contained in:
Andreas Koepf (aider) 2025-01-23 14:35:54 +01:00 committed by Andreas Koepf
parent 03a3bcf059
commit fa68760784
4 changed files with 652 additions and 0 deletions

331
python
View file

@ -315,3 +315,334 @@ def test_sequence_dataset_iteration():
# Test multiple iterations yield same items
assert items == list(dataset)
"""Propositional logic task generator"""
from dataclasses import dataclass
from enum import Enum
from random import Random
from typing import Any, List, Optional, Set, Tuple
class Operator(Enum):
"""Basic logical operators"""
AND = "∧"
OR = ""
NOT = "¬"
IMPLIES = "→"
IFF = "↔"
@dataclass
class PropositionalLogicConfig:
"""Configuration for propositional logic task generation"""
min_vars: int = 2 # Minimum number of variables
max_vars: int = 4 # Maximum number of variables
min_statements: int = 2 # Minimum number of given statements
max_statements: int = 4 # Maximum number of statements
max_complexity: int = 3 # Maximum operator depth
seed: Optional[int] = None
size: int = 500 # Virtual dataset size
def validate(self):
"""Validate configuration parameters"""
assert self.min_vars > 0, "min_vars must be positive"
assert self.max_vars >= self.min_vars, "max_vars must be >= min_vars"
assert self.min_statements > 0, "min_statements must be positive"
assert self.max_statements >= self.min_statements
assert self.max_complexity > 0, "max_complexity must be positive"
class Expression:
"""Represents a logical expression that can be evaluated"""
def __init__(self, operator: Optional[Operator], left: Any, right: Optional[Any] = None):
self.operator = operator
self.left = left
self.right = right
def evaluate(self, assignments: dict[str, bool]) -> bool:
"""Evaluate expression with given variable assignments"""
if self.operator is None:
return assignments[self.left] # Variable
elif self.operator == Operator.NOT:
return not self.left.evaluate(assignments)
elif self.operator == Operator.AND:
return self.left.evaluate(assignments) and self.right.evaluate(assignments)
elif self.operator == Operator.OR:
return self.left.evaluate(assignments) or self.right.evaluate(assignments)
elif self.operator == Operator.IMPLIES:
return (not self.left.evaluate(assignments)) or self.right.evaluate(assignments)
elif self.operator == Operator.IFF:
return self.left.evaluate(assignments) == self.right.evaluate(assignments)
raise ValueError(f"Unknown operator: {self.operator}")
def __str__(self) -> str:
if self.operator is None:
return self.left
elif self.operator == Operator.NOT:
return f"{self.operator.value}{self.left}"
else:
return f"({self.left} {self.operator.value} {self.right})"
class PropositionalLogicDataset:
"""Generates propositional logic reasoning tasks"""
def __init__(self, config: PropositionalLogicConfig):
self.config = config
self.config.validate()
self.seed = config.seed if config.seed is not None else Random().randint(0, 2**32)
def __len__(self) -> int:
return self.config.size
def __iter__(self):
self._current_idx = 0
return self
def __next__(self):
if self._current_idx >= self.config.size:
raise StopIteration
item = self[self._current_idx]
self._current_idx += 1
return item
def __getitem__(self, idx: int) -> dict[str, Any]:
"""Generate a single propositional logic task"""
rng = Random(self.seed + idx)
# Generate random variables
num_vars = rng.randint(self.config.min_vars, self.config.max_vars)
variables = [chr(ord('P') + i) for i in range(num_vars)]
# Generate premises
num_statements = rng.randint(self.config.min_statements, self.config.max_statements)
premises = self._generate_premises(rng, variables, num_statements)
# Generate a valid conclusion
conclusion = self._find_valid_conclusion(rng, premises, variables)
# Format question
question = "Given:\n"
for i, premise in enumerate(premises, 1):
question += f"{i}. {premise}\n"
question += "What can we conclude?"
return {
"question": question,
"answer": str(conclusion),
"metadata": {
"premises": [str(p) for p in premises],
"variables": variables,
"complexity": self._measure_complexity(conclusion)
}
}
def _generate_premises(self, rng: Random, variables: List[str], num_statements: int) -> List[Expression]:
"""Generate a list of premise statements"""
premises = []
for _ in range(num_statements):
depth = rng.randint(1, self.config.max_complexity)
premises.append(self._generate_expression(rng, variables, depth))
return premises
def _generate_expression(self, rng: Random, variables: List[str], depth: int) -> Expression:
"""Generate a random logical expression"""
if depth <= 1:
return Expression(None, rng.choice(variables))
operator = rng.choice(list(Operator))
if operator == Operator.NOT:
return Expression(operator, self._generate_expression(rng, variables, depth - 1))
else:
left = self._generate_expression(rng, variables, depth - 1)
right = self._generate_expression(rng, variables, depth - 1)
return Expression(operator, left, right)
def _find_valid_conclusion(self, rng: Random, premises: List[Expression], variables: List[str]) -> Expression:
"""Find a valid conclusion that follows from the premises"""
# Try random conclusions until we find a valid one
for _ in range(100):
candidate = self._generate_expression(rng, variables, 2)
if self._is_valid_conclusion(premises, candidate):
return candidate
# Fallback to a simple conclusion
return Expression(None, variables[0])
def _is_valid_conclusion(self, premises: List[Expression], conclusion: Expression) -> bool:
"""Check if conclusion follows from premises using truth tables"""
variables = self._collect_variables(premises + [conclusion])
# Check all possible assignments
for assignment in self._generate_assignments(variables):
# If premises are true but conclusion is false, invalid
if all(p.evaluate(assignment) for p in premises) and not conclusion.evaluate(assignment):
return False
return True
def _collect_variables(self, expressions: List[Expression]) -> Set[str]:
"""Collect all variables used in expressions"""
variables = set()
for expr in expressions:
if expr.operator is None:
variables.add(expr.left)
else:
if isinstance(expr.left, Expression):
variables.update(self._collect_variables([expr.left]))
if expr.right and isinstance(expr.right, Expression):
variables.update(self._collect_variables([expr.right]))
return variables
def _generate_assignments(self, variables: Set[str]) -> List[dict[str, bool]]:
"""Generate all possible truth value assignments"""
assignments = []
for i in range(2 ** len(variables)):
assignment = {}
for j, var in enumerate(sorted(variables)):
assignment[var] = bool((i >> j) & 1)
assignments.append(assignment)
return assignments
def _measure_complexity(self, expression: Expression) -> int:
"""Measure the complexity of an expression"""
if expression.operator is None:
return 1
elif expression.operator == Operator.NOT:
return 1 + self._measure_complexity(expression.left)
else:
return 1 + self._measure_complexity(expression.left) + self._measure_complexity(expression.right)
def propositional_logic_dataset(
min_vars: int = 2,
max_vars: int = 4,
min_statements: int = 2,
max_statements: int = 4,
max_complexity: int = 3,
seed: Optional[int] = None,
size: int = 500,
) -> PropositionalLogicDataset:
"""Create a PropositionalLogicDataset with the given configuration."""
config = PropositionalLogicConfig(
min_vars=min_vars,
max_vars=max_vars,
min_statements=min_statements,
max_statements=max_statements,
max_complexity=max_complexity,
seed=seed,
size=size,
)
return PropositionalLogicDataset(config)
"""Tests for propositional logic task generation"""
import pytest
from reasoning_gym.logic.propositional_logic import (
Expression,
Operator,
PropositionalLogicConfig,
PropositionalLogicDataset,
)
def test_propositional_logic_config_validation():
"""Test that invalid configs raise appropriate errors"""
with pytest.raises(AssertionError):
config = PropositionalLogicConfig(min_vars=0)
config.validate()
with pytest.raises(AssertionError):
config = PropositionalLogicConfig(min_vars=4, max_vars=3)
config.validate()
with pytest.raises(AssertionError):
config = PropositionalLogicConfig(min_statements=0)
config.validate()
def test_expression_evaluation():
"""Test logical expression evaluation"""
# Test simple variable
expr = Expression(None, "P")
assert expr.evaluate({"P": True}) is True
assert expr.evaluate({"P": False}) is False
# Test NOT
expr = Expression(Operator.NOT, Expression(None, "P"))
assert expr.evaluate({"P": True}) is False
assert expr.evaluate({"P": False}) is True
# Test AND
expr = Expression(
Operator.AND,
Expression(None, "P"),
Expression(None, "Q")
)
assert expr.evaluate({"P": True, "Q": True}) is True
assert expr.evaluate({"P": True, "Q": False}) is False
# Test IMPLIES
expr = Expression(
Operator.IMPLIES,
Expression(None, "P"),
Expression(None, "Q")
)
assert expr.evaluate({"P": True, "Q": False}) is False
assert expr.evaluate({"P": True, "Q": True}) is True
assert expr.evaluate({"P": False, "Q": False}) is True
def test_propositional_logic_dataset_deterministic():
"""Test that dataset generates same items with same seed"""
config = PropositionalLogicConfig(seed=42, size=10)
dataset1 = PropositionalLogicDataset(config)
dataset2 = PropositionalLogicDataset(config)
for i in range(len(dataset1)):
assert dataset1[i] == dataset2[i]
def test_propositional_logic_dataset_items():
"""Test basic properties of generated items"""
config = PropositionalLogicConfig(
min_vars=2,
max_vars=3,
min_statements=2,
max_statements=3,
max_complexity=2,
size=10,
seed=42
)
dataset = PropositionalLogicDataset(config)
for i in range(len(dataset)):
item = dataset[i]
assert isinstance(item, dict)
assert "question" in item
assert "answer" in item
assert "metadata" in item
assert isinstance(item["metadata"]["premises"], list)
assert isinstance(item["metadata"]["variables"], list)
assert isinstance(item["metadata"]["complexity"], int)
def test_propositional_logic_dataset_iteration():
"""Test that iteration respects dataset size"""
config = PropositionalLogicConfig(size=5, seed=42)
dataset = PropositionalLogicDataset(config)
items = list(dataset)
assert len(items) == config.size
# Test multiple iterations yield same items
assert items == list(dataset)
"""
Logic tasks for training reasoning capabilities:
- Propositional logic
- Predicate logic
- Set theory
- Syllogisms
"""
from .propositional_logic import PropositionalLogicConfig, PropositionalLogicDataset, propositional_logic_dataset
__all__ = ["PropositionalLogicConfig", "PropositionalLogicDataset", "propositional_logic_dataset"]

View file

@ -0,0 +1,11 @@
"""
Logic tasks for training reasoning capabilities:
- Propositional logic
- Predicate logic
- Set theory
- Syllogisms
"""
from .propositional_logic import PropositionalLogicConfig, PropositionalLogicDataset, propositional_logic_dataset
__all__ = ["PropositionalLogicConfig", "PropositionalLogicDataset", "propositional_logic_dataset"]

View file

@ -0,0 +1,221 @@
"""Propositional logic task generator"""
from dataclasses import dataclass
from enum import Enum
from random import Random
from typing import Any, List, Optional, Set, Tuple
class Operator(Enum):
"""Basic logical operators"""
AND = ""
OR = ""
NOT = "¬"
IMPLIES = ""
IFF = ""
@dataclass
class PropositionalLogicConfig:
"""Configuration for propositional logic task generation"""
min_vars: int = 2 # Minimum number of variables
max_vars: int = 4 # Maximum number of variables
min_statements: int = 2 # Minimum number of given statements
max_statements: int = 4 # Maximum number of statements
max_complexity: int = 3 # Maximum operator depth
seed: Optional[int] = None
size: int = 500 # Virtual dataset size
def validate(self):
"""Validate configuration parameters"""
assert self.min_vars > 0, "min_vars must be positive"
assert self.max_vars >= self.min_vars, "max_vars must be >= min_vars"
assert self.min_statements > 0, "min_statements must be positive"
assert self.max_statements >= self.min_statements
assert self.max_complexity > 0, "max_complexity must be positive"
class Expression:
"""Represents a logical expression that can be evaluated"""
def __init__(self, operator: Optional[Operator], left: Any, right: Optional[Any] = None):
self.operator = operator
self.left = left
self.right = right
def evaluate(self, assignments: dict[str, bool]) -> bool:
"""Evaluate expression with given variable assignments"""
if self.operator is None:
return assignments[self.left] # Variable
elif self.operator == Operator.NOT:
return not self.left.evaluate(assignments)
elif self.operator == Operator.AND:
return self.left.evaluate(assignments) and self.right.evaluate(assignments)
elif self.operator == Operator.OR:
return self.left.evaluate(assignments) or self.right.evaluate(assignments)
elif self.operator == Operator.IMPLIES:
return (not self.left.evaluate(assignments)) or self.right.evaluate(assignments)
elif self.operator == Operator.IFF:
return self.left.evaluate(assignments) == self.right.evaluate(assignments)
raise ValueError(f"Unknown operator: {self.operator}")
def __str__(self) -> str:
if self.operator is None:
return self.left
elif self.operator == Operator.NOT:
return f"{self.operator.value}{self.left}"
else:
return f"({self.left} {self.operator.value} {self.right})"
class PropositionalLogicDataset:
"""Generates propositional logic reasoning tasks"""
def __init__(self, config: PropositionalLogicConfig):
self.config = config
self.config.validate()
self.seed = config.seed if config.seed is not None else Random().randint(0, 2**32)
def __len__(self) -> int:
return self.config.size
def __iter__(self):
self._current_idx = 0
return self
def __next__(self):
if self._current_idx >= self.config.size:
raise StopIteration
item = self[self._current_idx]
self._current_idx += 1
return item
def __getitem__(self, idx: int) -> dict[str, Any]:
"""Generate a single propositional logic task"""
rng = Random(self.seed + idx)
# Generate random variables
num_vars = rng.randint(self.config.min_vars, self.config.max_vars)
variables = [chr(ord("P") + i) for i in range(num_vars)]
# Generate premises
num_statements = rng.randint(self.config.min_statements, self.config.max_statements)
premises = self._generate_premises(rng, variables, num_statements)
# Generate a valid conclusion
conclusion = self._find_valid_conclusion(rng, premises, variables)
# Format question
question = "Given:\n"
for i, premise in enumerate(premises, 1):
question += f"{i}. {premise}\n"
question += "What can we conclude?"
return {
"question": question,
"answer": str(conclusion),
"metadata": {
"premises": [str(p) for p in premises],
"variables": variables,
"complexity": self._measure_complexity(conclusion),
},
}
def _generate_premises(self, rng: Random, variables: List[str], num_statements: int) -> List[Expression]:
"""Generate a list of premise statements"""
premises = []
for _ in range(num_statements):
depth = rng.randint(1, self.config.max_complexity)
premises.append(self._generate_expression(rng, variables, depth))
return premises
def _generate_expression(self, rng: Random, variables: List[str], depth: int) -> Expression:
"""Generate a random logical expression"""
if depth <= 1:
return Expression(None, rng.choice(variables))
operator = rng.choice(list(Operator))
if operator == Operator.NOT:
return Expression(operator, self._generate_expression(rng, variables, depth - 1))
else:
left = self._generate_expression(rng, variables, depth - 1)
right = self._generate_expression(rng, variables, depth - 1)
return Expression(operator, left, right)
def _find_valid_conclusion(self, rng: Random, premises: List[Expression], variables: List[str]) -> Expression:
"""Find a valid conclusion that follows from the premises"""
# Try random conclusions until we find a valid one
for _ in range(100):
candidate = self._generate_expression(rng, variables, 2)
if self._is_valid_conclusion(premises, candidate):
return candidate
# Fallback to a simple conclusion
return Expression(None, variables[0])
def _is_valid_conclusion(self, premises: List[Expression], conclusion: Expression) -> bool:
"""Check if conclusion follows from premises using truth tables"""
variables = self._collect_variables(premises + [conclusion])
# Check all possible assignments
for assignment in self._generate_assignments(variables):
# If premises are true but conclusion is false, invalid
if all(p.evaluate(assignment) for p in premises) and not conclusion.evaluate(assignment):
return False
return True
def _collect_variables(self, expressions: List[Expression]) -> Set[str]:
"""Collect all variables used in expressions"""
variables = set()
for expr in expressions:
if expr.operator is None:
variables.add(expr.left)
else:
if isinstance(expr.left, Expression):
variables.update(self._collect_variables([expr.left]))
if expr.right and isinstance(expr.right, Expression):
variables.update(self._collect_variables([expr.right]))
return variables
def _generate_assignments(self, variables: Set[str]) -> List[dict[str, bool]]:
"""Generate all possible truth value assignments"""
assignments = []
for i in range(2 ** len(variables)):
assignment = {}
for j, var in enumerate(sorted(variables)):
assignment[var] = bool((i >> j) & 1)
assignments.append(assignment)
return assignments
def _measure_complexity(self, expression: Expression) -> int:
"""Measure the complexity of an expression"""
if expression.operator is None:
return 1
elif expression.operator == Operator.NOT:
return 1 + self._measure_complexity(expression.left)
else:
return 1 + self._measure_complexity(expression.left) + self._measure_complexity(expression.right)
def propositional_logic_dataset(
min_vars: int = 2,
max_vars: int = 4,
min_statements: int = 2,
max_statements: int = 4,
max_complexity: int = 3,
seed: Optional[int] = None,
size: int = 500,
) -> PropositionalLogicDataset:
"""Create a PropositionalLogicDataset with the given configuration."""
config = PropositionalLogicConfig(
min_vars=min_vars,
max_vars=max_vars,
min_statements=min_statements,
max_statements=max_statements,
max_complexity=max_complexity,
seed=seed,
size=size,
)
return PropositionalLogicDataset(config)

View file

@ -0,0 +1,89 @@
"""Tests for propositional logic task generation"""
import pytest
from reasoning_gym.logic.propositional_logic import (
Expression,
Operator,
PropositionalLogicConfig,
PropositionalLogicDataset,
)
def test_propositional_logic_config_validation():
"""Test that invalid configs raise appropriate errors"""
with pytest.raises(AssertionError):
config = PropositionalLogicConfig(min_vars=0)
config.validate()
with pytest.raises(AssertionError):
config = PropositionalLogicConfig(min_vars=4, max_vars=3)
config.validate()
with pytest.raises(AssertionError):
config = PropositionalLogicConfig(min_statements=0)
config.validate()
def test_expression_evaluation():
"""Test logical expression evaluation"""
# Test simple variable
expr = Expression(None, "P")
assert expr.evaluate({"P": True}) is True
assert expr.evaluate({"P": False}) is False
# Test NOT
expr = Expression(Operator.NOT, Expression(None, "P"))
assert expr.evaluate({"P": True}) is False
assert expr.evaluate({"P": False}) is True
# Test AND
expr = Expression(Operator.AND, Expression(None, "P"), Expression(None, "Q"))
assert expr.evaluate({"P": True, "Q": True}) is True
assert expr.evaluate({"P": True, "Q": False}) is False
# Test IMPLIES
expr = Expression(Operator.IMPLIES, Expression(None, "P"), Expression(None, "Q"))
assert expr.evaluate({"P": True, "Q": False}) is False
assert expr.evaluate({"P": True, "Q": True}) is True
assert expr.evaluate({"P": False, "Q": False}) is True
def test_propositional_logic_dataset_deterministic():
"""Test that dataset generates same items with same seed"""
config = PropositionalLogicConfig(seed=42, size=10)
dataset1 = PropositionalLogicDataset(config)
dataset2 = PropositionalLogicDataset(config)
for i in range(len(dataset1)):
assert dataset1[i] == dataset2[i]
def test_propositional_logic_dataset_items():
"""Test basic properties of generated items"""
config = PropositionalLogicConfig(
min_vars=2, max_vars=3, min_statements=2, max_statements=3, max_complexity=2, size=10, seed=42
)
dataset = PropositionalLogicDataset(config)
for i in range(len(dataset)):
item = dataset[i]
assert isinstance(item, dict)
assert "question" in item
assert "answer" in item
assert "metadata" in item
assert isinstance(item["metadata"]["premises"], list)
assert isinstance(item["metadata"]["variables"], list)
assert isinstance(item["metadata"]["complexity"], int)
def test_propositional_logic_dataset_iteration():
"""Test that iteration respects dataset size"""
config = PropositionalLogicConfig(size=5, seed=42)
dataset = PropositionalLogicDataset(config)
items = list(dataset)
assert len(items) == config.size
# Test multiple iterations yield same items
assert items == list(dataset)