reasoning-gym/reasoning_gym/logic/syllogisms.py

266 lines
9.7 KiB
Python

"""Syllogism reasoning task generator"""
from dataclasses import dataclass
from enum import StrEnum
from random import Random
from typing import List, Optional, Tuple
from ..factory import ProceduralDataset, register_dataset
class Quantifier(StrEnum):
ALL = "All"
NO = "No"
SOME = "Some"
SOME_NOT = "Some ... are not"
class Term:
"""Represents a categorical term used in syllogisms"""
def __init__(self, name: str, plural: str):
self.name = name
self.plural = plural
@dataclass
class SyllogismConfig:
"""Configuration for syllogism task generation"""
# Lists of terms to use in syllogisms
terms: List[Term] = None # Will be populated with defaults if None
# Control which quantifiers to use
allow_all: bool = True
allow_no: bool = True
allow_some: bool = True
allow_some_not: bool = True
# Whether to include invalid syllogisms as negative examples
include_invalid: bool = True
# Percentage of invalid examples if included (0.0 to 1.0)
invalid_ratio: float = 0.3
seed: Optional[int] = None
size: int = 500
def validate(self) -> None:
"""Validate configuration parameters"""
assert any(
[self.allow_all, self.allow_no, self.allow_some, self.allow_some_not]
), "At least one quantifier type must be allowed"
assert 0.0 <= self.invalid_ratio <= 1.0, "invalid_ratio must be between 0.0 and 1.0"
class SyllogismDataset(ProceduralDataset):
"""Generates syllogism reasoning tasks"""
# Default terms if none provided
DEFAULT_TERMS = [
# People
Term("mortal", "mortals"),
Term("human", "humans"),
Term("child", "children"),
Term("adult", "adults"),
Term("parent", "parents"),
Term("grandparent", "grandparents"),
# Professions
Term("philosopher", "philosophers"),
Term("student", "students"),
Term("teacher", "teachers"),
Term("doctor", "doctors"),
Term("scientist", "scientists"),
Term("artist", "artists"),
Term("musician", "musicians"),
Term("writer", "writers"),
Term("programmer", "programmers"),
Term("engineer", "engineers"),
Term("lawyer", "lawyers"),
Term("chef", "chefs"),
# Animals
Term("animal", "animals"),
Term("mammal", "mammals"),
Term("dog", "dogs"),
Term("cat", "cats"),
Term("bird", "birds"),
Term("fish", "fish"),
Term("reptile", "reptiles"),
Term("insect", "insects"),
Term("butterfly", "butterflies"),
Term("bee", "bees"),
Term("ant", "ants"),
Term("spider", "spiders"),
Term("horse", "horses"),
Term("elephant", "elephants"),
Term("lion", "lions"),
Term("tiger", "tigers"),
Term("whale", "whales"),
Term("dolphin", "dolphins"),
]
def __init__(self, config: SyllogismConfig):
super().__init__(config=config, seed=config.seed, size=config.size)
self.terms = self.DEFAULT_TERMS if config.terms is None else config.terms
def _get_allowed_quantifiers(self) -> List[Quantifier]:
"""Get list of allowed quantifiers based on config"""
quantifiers = []
if self.config.allow_all:
quantifiers.append(Quantifier.ALL)
if self.config.allow_no:
quantifiers.append(Quantifier.NO)
if self.config.allow_some:
quantifiers.append(Quantifier.SOME)
if self.config.allow_some_not:
quantifiers.append(Quantifier.SOME_NOT)
return quantifiers
def _is_valid_syllogism(
self,
premise1: Tuple[Quantifier, Term, Term],
premise2: Tuple[Quantifier, Term, Term],
conclusion: Tuple[Quantifier, Term, Term],
) -> bool:
"""
Check if a syllogism is logically valid using classical logic rules.
Rules implemented:
1. Universal Affirmative (ALL):
- If both premises are ALL, conclusion must be ALL
- ALL A are B + ALL B are C → ALL A are C (Barbara)
2. Universal Negative (NO):
- If one premise is NO and other is ALL, conclusion must be NO
- NO A are B + ALL C are B → NO A are C (Celarent)
- ALL A are B + NO C are B → NO A are C (Cesare)
3. Particular Affirmative (SOME):
- If one premise is SOME and other is ALL, conclusion must be SOME
- SOME A are B + ALL B are C → SOME A are C (Darii)
- ALL A are B + SOME C are B → SOME A are C (Disamis)
4. Particular Negative (SOME_NOT):
- If one premise is SOME_NOT and other is ALL, conclusion can be SOME_NOT
- SOME A are not B + ALL B are C → SOME A are not C (Ferio)
- ALL A are B + SOME C are not B → SOME A are not C (Festino)
5. Invalid combinations:
- Two negative premises never yield a valid conclusion
- Two particular premises never yield a valid conclusion
- If both premises are particular, no valid conclusion
- If conclusion is universal but either premise is particular, invalid
"""
q1, t1_1, t1_2 = premise1
q2, t2_1, t2_2 = premise2
qc, tc_1, tc_2 = conclusion
# Rule 5: Two negative premises -> invalid
if q1 in (Quantifier.NO, Quantifier.SOME_NOT) and q2 in (Quantifier.NO, Quantifier.SOME_NOT):
return False
# Rule 5: Two particular premises -> invalid
if q1 in (Quantifier.SOME, Quantifier.SOME_NOT) and q2 in (Quantifier.SOME, Quantifier.SOME_NOT):
return False
# Rule 5: Universal conclusion with particular premise -> invalid
if qc in (Quantifier.ALL, Quantifier.NO) and (
q1 in (Quantifier.SOME, Quantifier.SOME_NOT) or q2 in (Quantifier.SOME, Quantifier.SOME_NOT)
):
return False
# Rule 1: Barbara syllogism
if q1 == Quantifier.ALL and q2 == Quantifier.ALL:
if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
return qc == Quantifier.ALL
# Rule 2: Celarent syllogism
if q1 == Quantifier.NO and q2 == Quantifier.ALL:
if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
return qc == Quantifier.NO
# Rule 2: Cesare syllogism
if q1 == Quantifier.ALL and q2 == Quantifier.NO:
if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
return qc == Quantifier.NO
# Rule 3: Darii syllogism
if q1 == Quantifier.SOME and q2 == Quantifier.ALL:
if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
return qc == Quantifier.SOME
# Rule 3: Disamis syllogism
if q1 == Quantifier.ALL and q2 == Quantifier.SOME:
if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
return qc == Quantifier.SOME
# Rule 4: Ferio syllogism
if q1 == Quantifier.SOME_NOT and q2 == Quantifier.ALL:
if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
return qc == Quantifier.SOME_NOT
# Rule 4: Festino syllogism
if q1 == Quantifier.ALL and q2 == Quantifier.SOME_NOT:
if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
return qc == Quantifier.SOME_NOT
return False
def _format_quantifier_statement(self, quantifier: Quantifier, subject: Term, predicate: Term) -> str:
"""Format a quantified statement in natural language"""
if quantifier == Quantifier.SOME_NOT:
return f"Some {subject.plural} are not {predicate.plural}"
else:
return f"{quantifier.value} {subject.plural} are {predicate.plural}"
def _generate_syllogism(self, rng: Random) -> dict:
"""Generate a single syllogism problem"""
# Select three different terms
terms = rng.sample(self.terms, 3)
quantifiers = self._get_allowed_quantifiers()
# Generate premises and conclusion
premise1 = (rng.choice(quantifiers), terms[0], terms[1])
premise2 = (rng.choice(quantifiers), terms[1], terms[2])
conclusion = (rng.choice(quantifiers), terms[0], terms[2])
# Decide if this should be a valid or invalid syllogism
is_valid = True
if self.config.include_invalid and rng.random() < self.config.invalid_ratio:
is_valid = False
# If should be invalid, regenerate conclusion until invalid
while self._is_valid_syllogism(premise1, premise2, conclusion):
conclusion = (rng.choice(quantifiers), terms[0], terms[2])
# Format the syllogism as text
premise1_text = self._format_quantifier_statement(premise1[0], premise1[1], premise1[2])
premise2_text = self._format_quantifier_statement(premise2[0], premise2[1], premise2[2])
conclusion_text = self._format_quantifier_statement(conclusion[0], conclusion[1], conclusion[2])
question = (
f"Consider these statements:\n"
f"1. {premise1_text}\n"
f"2. {premise2_text}\n\n"
f"Does it logically follow that:\n"
f"{conclusion_text}?\n"
f"(Answer Yes or No)"
)
return {
"question": question,
"answer": "Yes" if is_valid else "No",
"metadata": {
"premise1": premise1_text,
"premise2": premise2_text,
"conclusion": conclusion_text,
"is_valid": is_valid,
},
}
def __getitem__(self, idx: int) -> dict:
"""Generate a single syllogism task"""
rng = Random(self.seed + idx)
return self._generate_syllogism(rng)
register_dataset("syllogism", SyllogismDataset, SyllogismConfig)