reasoning-gym/reasoning_gym/logic/syllogisms.py

"""Syllogism reasoning task generator"""

from dataclasses import dataclass
from enum import StrEnum
from random import Random
from typing import List, Optional, Tuple

from ..factory import ProceduralDataset, register_dataset


class Quantifier(StrEnum):
    ALL = "All"
    NO = "No"
    SOME = "Some"
    SOME_NOT = "Some ... are not"


class Term:
    """Represents a categorical term used in syllogisms"""

    def __init__(self, name: str, plural: str):
        self.name = name
        self.plural = plural


@dataclass
class SyllogismConfig:
    """Configuration for syllogism task generation"""

    # Lists of terms to use in syllogisms
    terms: List[Term] = None  # Will be populated with defaults if None

    # Control which quantifiers to use
    allow_all: bool = True
    allow_no: bool = True
    allow_some: bool = True
    allow_some_not: bool = True

    # Whether to include invalid syllogisms as negative examples
    include_invalid: bool = True

    # Percentage of invalid examples if included (0.0 to 1.0)
    invalid_ratio: float = 0.3

    seed: Optional[int] = None
    size: int = 500

    def validate(self) -> None:
        """Validate configuration parameters"""
        assert any(
            [self.allow_all, self.allow_no, self.allow_some, self.allow_some_not]
        ), "At least one quantifier type must be allowed"
        assert 0.0 <= self.invalid_ratio <= 1.0, "invalid_ratio must be between 0.0 and 1.0"


class SyllogismDataset(ProceduralDataset):
    """Generates syllogism reasoning tasks"""

    # Default terms if none provided
    DEFAULT_TERMS = [
        # People
        Term("mortal", "mortals"),
        Term("human", "humans"),
        Term("child", "children"),
        Term("adult", "adults"),
        Term("parent", "parents"),
        Term("grandparent", "grandparents"),
        # Professions
        Term("philosopher", "philosophers"),
        Term("student", "students"),
        Term("teacher", "teachers"),
        Term("doctor", "doctors"),
        Term("scientist", "scientists"),
        Term("artist", "artists"),
        Term("musician", "musicians"),
        Term("writer", "writers"),
        Term("programmer", "programmers"),
        Term("engineer", "engineers"),
        Term("lawyer", "lawyers"),
        Term("chef", "chefs"),
        # Animals
        Term("animal", "animals"),
        Term("mammal", "mammals"),
        Term("dog", "dogs"),
        Term("cat", "cats"),
        Term("bird", "birds"),
        Term("fish", "fish"),
        Term("reptile", "reptiles"),
        Term("insect", "insects"),
        Term("butterfly", "butterflies"),
        Term("bee", "bees"),
        Term("ant", "ants"),
        Term("spider", "spiders"),
        Term("horse", "horses"),
        Term("elephant", "elephants"),
        Term("lion", "lions"),
        Term("tiger", "tigers"),
        Term("whale", "whales"),
        Term("dolphin", "dolphins"),
    ]

    def __init__(self, config: SyllogismConfig):
        super().__init__(config=config, seed=config.seed, size=config.size)
        self.terms = self.DEFAULT_TERMS if config.terms is None else config.terms

    def _get_allowed_quantifiers(self) -> List[Quantifier]:
        """Get list of allowed quantifiers based on config"""
        quantifiers = []
        if self.config.allow_all:
            quantifiers.append(Quantifier.ALL)
        if self.config.allow_no:
            quantifiers.append(Quantifier.NO)
        if self.config.allow_some:
            quantifiers.append(Quantifier.SOME)
        if self.config.allow_some_not:
            quantifiers.append(Quantifier.SOME_NOT)
        return quantifiers

    def _is_valid_syllogism(
        self,
        premise1: Tuple[Quantifier, Term, Term],
        premise2: Tuple[Quantifier, Term, Term],
        conclusion: Tuple[Quantifier, Term, Term],
    ) -> bool:
        """
        Check if a syllogism is logically valid using classical logic rules.

        Rules implemented:
        1. Universal Affirmative (ALL):
           - If both premises are ALL, conclusion must be ALL
           - ALL A are B + ALL B are C → ALL A are C (Barbara)

        2. Universal Negative (NO):
           - If one premise is NO and other is ALL, conclusion must be NO
           - NO A are B + ALL C are B → NO A are C (Celarent)
           - ALL A are B + NO C are B → NO A are C (Cesare)

        3. Particular Affirmative (SOME):
           - If one premise is SOME and other is ALL, conclusion must be SOME
           - SOME A are B + ALL B are C → SOME A are C (Darii)
           - ALL A are B + SOME C are B → SOME A are C (Disamis)

        4. Particular Negative (SOME_NOT):
           - If one premise is SOME_NOT and other is ALL, conclusion can be SOME_NOT
           - SOME A are not B + ALL B are C → SOME A are not C (Ferio)
           - ALL A are B + SOME C are not B → SOME A are not C (Festino)

        5. Invalid combinations:
           - Two negative premises never yield a valid conclusion
           - Two particular premises never yield a valid conclusion
           - If both premises are particular, no valid conclusion
           - If conclusion is universal but either premise is particular, invalid
        """
        q1, t1_1, t1_2 = premise1
        q2, t2_1, t2_2 = premise2
        qc, tc_1, tc_2 = conclusion

        # Rule 5: Two negative premises -> invalid
        if q1 in (Quantifier.NO, Quantifier.SOME_NOT) and q2 in (Quantifier.NO, Quantifier.SOME_NOT):
            return False

        # Rule 5: Two particular premises -> invalid
        if q1 in (Quantifier.SOME, Quantifier.SOME_NOT) and q2 in (Quantifier.SOME, Quantifier.SOME_NOT):
            return False

        # Rule 5: Universal conclusion with particular premise -> invalid
        if qc in (Quantifier.ALL, Quantifier.NO) and (
            q1 in (Quantifier.SOME, Quantifier.SOME_NOT) or q2 in (Quantifier.SOME, Quantifier.SOME_NOT)
        ):
            return False

        # Rule 1: Barbara syllogism
        if q1 == Quantifier.ALL and q2 == Quantifier.ALL:
            if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
                return qc == Quantifier.ALL

        # Rule 2: Celarent syllogism
        if q1 == Quantifier.NO and q2 == Quantifier.ALL:
            if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
                return qc == Quantifier.NO

        # Rule 2: Cesare syllogism
        if q1 == Quantifier.ALL and q2 == Quantifier.NO:
            if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
                return qc == Quantifier.NO

        # Rule 3: Darii syllogism
        if q1 == Quantifier.SOME and q2 == Quantifier.ALL:
            if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
                return qc == Quantifier.SOME

        # Rule 3: Disamis syllogism
        if q1 == Quantifier.ALL and q2 == Quantifier.SOME:
            if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
                return qc == Quantifier.SOME

        # Rule 4: Ferio syllogism
        if q1 == Quantifier.SOME_NOT and q2 == Quantifier.ALL:
            if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
                return qc == Quantifier.SOME_NOT

        # Rule 4: Festino syllogism
        if q1 == Quantifier.ALL and q2 == Quantifier.SOME_NOT:
            if t1_2 == t2_1 and tc_1 == t1_1 and tc_2 == t2_2:
                return qc == Quantifier.SOME_NOT

        return False

    def _format_quantifier_statement(self, quantifier: Quantifier, subject: Term, predicate: Term) -> str:
        """Format a quantified statement in natural language"""
        if quantifier == Quantifier.SOME_NOT:
            return f"Some {subject.plural} are not {predicate.plural}"
        else:
            return f"{quantifier.value} {subject.plural} are {predicate.plural}"

    def _generate_syllogism(self, rng: Random) -> dict:
        """Generate a single syllogism problem"""
        # Select three different terms
        terms = rng.sample(self.terms, 3)
        quantifiers = self._get_allowed_quantifiers()

        # Generate premises and conclusion
        premise1 = (rng.choice(quantifiers), terms[0], terms[1])
        premise2 = (rng.choice(quantifiers), terms[1], terms[2])
        conclusion = (rng.choice(quantifiers), terms[0], terms[2])

        # Decide if this should be a valid or invalid syllogism
        is_valid = True
        if self.config.include_invalid and rng.random() < self.config.invalid_ratio:
            is_valid = False
            # If should be invalid, regenerate conclusion until invalid
            while self._is_valid_syllogism(premise1, premise2, conclusion):
                conclusion = (rng.choice(quantifiers), terms[0], terms[2])

        # Format the syllogism as text
        premise1_text = self._format_quantifier_statement(premise1[0], premise1[1], premise1[2])
        premise2_text = self._format_quantifier_statement(premise2[0], premise2[1], premise2[2])
        conclusion_text = self._format_quantifier_statement(conclusion[0], conclusion[1], conclusion[2])

        question = (
            f"Consider these statements:\n"
            f"1. {premise1_text}\n"
            f"2. {premise2_text}\n\n"
            f"Does it logically follow that:\n"
            f"{conclusion_text}?\n"
            f"(Answer Yes or No)"
        )

        return {
            "question": question,
            "answer": "Yes" if is_valid else "No",
            "metadata": {
                "premise1": premise1_text,
                "premise2": premise2_text,
                "conclusion": conclusion_text,
                "is_valid": is_valid,
            },
        }

    def __getitem__(self, idx: int) -> dict:
        """Generate a single syllogism task"""
        rng = Random(self.seed + idx)
        return self._generate_syllogism(rng)


register_dataset("syllogism", SyllogismDataset, SyllogismConfig)