reasoning-gym/tests/test_syllogisms.py
Andreas Köpf 0c8752c7b1
Fix syllogisms (#82)
* let o1 write a new is_valid_syllogism() check
* extend unit test
* update gallery
2025-02-07 21:47:59 +01:00

274 lines
9.1 KiB
Python

"""Tests for syllogism task generation"""
import pytest
from reasoning_gym.logic.syllogisms import Quantifier, SyllogismConfig, SyllogismDataset, Term
def test_syllogism_config_validation():
"""Test that invalid configs raise appropriate errors"""
with pytest.raises(AssertionError):
config = SyllogismConfig(
allow_all=False,
allow_no=False,
allow_some=False,
allow_some_not=False,
) # No quantifiers allowed
config.validate()
with pytest.raises(AssertionError):
config = SyllogismConfig(invalid_ratio=-0.1) # Invalid ratio
config.validate()
with pytest.raises(AssertionError):
config = SyllogismConfig(invalid_ratio=1.1) # Invalid ratio
config.validate()
def test_syllogism_dataset_deterministic():
"""Test that dataset generates same items with same seed"""
config = SyllogismConfig(seed=42, size=10)
dataset1 = SyllogismDataset(config)
dataset2 = SyllogismDataset(config)
for i in range(len(dataset1)):
assert dataset1[i] == dataset2[i]
def test_syllogism_dataset_items():
"""Test basic properties of generated items"""
config = SyllogismConfig(size=10, seed=42)
dataset = SyllogismDataset(config)
for i in range(len(dataset)):
item = dataset[i]
# Check item structure
assert isinstance(item, dict)
assert "question" in item
assert "answer" in item
assert "metadata" in item
# Check metadata
assert "premise1" in item["metadata"]
assert "premise2" in item["metadata"]
assert "conclusion" in item["metadata"]
assert "is_valid" in item["metadata"]
# Verify answer format
assert item["answer"] in ("Yes", "No")
# Verify question format
assert "Consider these statements:" in item["question"]
assert "1." in item["question"]
assert "2." in item["question"]
assert "Does it logically follow that:" in item["question"]
def test_valid_syllogism_forms():
"""Test specific valid syllogistic forms"""
config = SyllogismConfig(size=1, seed=42)
dataset = SyllogismDataset(config)
# Create some test terms
A = Term("mortal", "mortals")
B = Term("human", "humans")
C = Term("animal", "animals")
# Test Barbara (AAA-1)
# Major premise: All M are P
# Minor premise: All S are M
# Conclusion: All S are P
assert dataset._is_valid_syllogism(
(Quantifier.ALL, B, C), # All B (M) are C (P)
(Quantifier.ALL, A, B), # All A (S) are B (M)
(Quantifier.ALL, A, C), # All A (S) are C (P)
)
# Test Celarent (EAE-1)
# Major premise: No M are P
# Minor premise: All S are M
# Conclusion: No S are P
assert dataset._is_valid_syllogism(
(Quantifier.NO, B, C), # No B (M) are C (P)
(Quantifier.ALL, A, B), # All A (S) are B (M)
(Quantifier.NO, A, C), # No A (S) are C (P)
)
# Test Cesare (EAE-2) — corrected order
# Major premise: No P are M
# Minor premise: All S are M
# Conclusion: No S are P
assert dataset._is_valid_syllogism(
(Quantifier.NO, C, B), # No C (P) are B (M) [Major premise]
(Quantifier.ALL, A, B), # All A (S) are B (M) [Minor premise]
(Quantifier.NO, A, C), # No A (S) are C (P)
)
# Test Darii (AII-1)
# Major premise: All M are P
# Minor premise: Some S are M
# Conclusion: Some S are P
assert dataset._is_valid_syllogism(
(Quantifier.ALL, B, C), # All B (M) are C (P)
(Quantifier.SOME, A, B), # Some A (S) are B (M)
(Quantifier.SOME, A, C), # Some A (S) are C (P)
)
# Test Disamis (IAI-3)
# Major premise: Some M are P
# Minor premise: All M are S
# Conclusion: Some S are P
assert dataset._is_valid_syllogism(
(Quantifier.SOME, B, C), # Some B (M) are C (P)
(Quantifier.ALL, B, A), # All B (M) are A (S)
(Quantifier.SOME, A, C), # Some A (S) are C (P)
)
# Test Ferio (EIO-1)
# Major premise: No M are P
# Minor premise: Some S are M
# Conclusion: Some S are not P
assert dataset._is_valid_syllogism(
(Quantifier.NO, B, C), # No B (M) are C (P)
(Quantifier.SOME, A, B), # Some A (S) are B (M)
(Quantifier.SOME_NOT, A, C), # Some A (S) are not C (P)
)
# Test Festino (EIO-2)
# Major premise: No P are M
# Minor premise: Some S are M
# Conclusion: Some S are not P
assert dataset._is_valid_syllogism(
(Quantifier.NO, C, B), # No C (P) are B (M)
(Quantifier.SOME, A, B), # Some A (S) are B (M)
(Quantifier.SOME_NOT, A, C), # Some A (S) are not C (P)
)
# Test Datisi (AII-3)
# Major premise: All M are P
# Minor premise: Some M are S
# Conclusion: Some S are P
assert dataset._is_valid_syllogism(
(Quantifier.ALL, B, C), # All B (M) are C (P)
(Quantifier.SOME, B, A), # Some B (M) are A (S)
(Quantifier.SOME, A, C), # Some A (S) are C (P)
)
# Test Bocardo (OAO-3)
# Major premise: Some M are not P
# Minor premise: All M are S
# Conclusion: Some S are not P
assert dataset._is_valid_syllogism(
(Quantifier.SOME_NOT, B, C), # Some B (M) are not C (P)
(Quantifier.ALL, B, A), # All B (M) are A (S)
(Quantifier.SOME_NOT, A, C), # Some A (S) are not C (P)
)
# Test Baroco (AOO-2)
# Major premise: All P are M
# Minor premise: Some S are not M
# Conclusion: Some S are not P
assert dataset._is_valid_syllogism(
(Quantifier.ALL, C, B), # All C (P) are B (M)
(Quantifier.SOME_NOT, A, B), # Some A (S) are not B (M)
(Quantifier.SOME_NOT, A, C), # Some A (S) are not C (P)
)
# Test Camestres (AEE-2)
# Major premise: All P are M
# Minor premise: No S are M
# Conclusion: No S are P
assert dataset._is_valid_syllogism(
(Quantifier.ALL, C, B), # All C (P) are B (M)
(Quantifier.NO, A, B), # No A (S) are B (M)
(Quantifier.NO, A, C), # No A (S) are C (P)
)
# Test Dimaris (IAI-4)
# Major premise: Some P are M
# Minor premise: All M are S
# Conclusion: Some S are P
assert dataset._is_valid_syllogism(
(Quantifier.SOME, C, B), # Some C (P) are B (M)
(Quantifier.ALL, B, A), # All B (M) are A (S)
(Quantifier.SOME, A, C), # Some A (S) are C (P)
)
# Test Ferison (EIO-3)
# Major premise: No M are P
# Minor premise: Some M are S
# Conclusion: Some S are not P
assert dataset._is_valid_syllogism(
(Quantifier.NO, B, C), # No B (M) are C (P)
(Quantifier.SOME, B, A), # Some B (M) are A (S)
(Quantifier.SOME_NOT, A, C), # Some A (S) are not C (P)
)
# Test Fresison (EIO-4)
# Major premise: No P are M
# Minor premise: Some M are S
# Conclusion: Some S are not P
assert dataset._is_valid_syllogism(
(Quantifier.NO, C, B), # No C (P) are B (M)
(Quantifier.SOME, B, A), # Some B (M) are A (S)
(Quantifier.SOME_NOT, A, C), # Some A (S) are not C (P)
)
# Test Camenes (AEE-4)
# Major premise: All P are M
# Minor premise: No M are S
# Conclusion: No S are P
assert dataset._is_valid_syllogism(
(Quantifier.ALL, C, B), # All C (P) are B (M)
(Quantifier.NO, B, A), # No B (M) are A (S)
(Quantifier.NO, A, C), # No A (S) are C (P)
)
# Test invalid forms
assert not dataset._is_valid_syllogism(
(Quantifier.SOME, B, C), # Some B are C
(Quantifier.SOME, A, B), # Some A are B
(Quantifier.SOME, A, C), # Some A are C (invalid: two particular premises)
)
assert not dataset._is_valid_syllogism(
(Quantifier.NO, B, C), # No B are C
(Quantifier.NO, A, B), # No A are B
(Quantifier.NO, A, C), # No A are C (invalid: two negative premises)
)
# Test specific invalid case with two negative premises
S = Term("student", "students")
M = Term("human", "humans")
P = Term("chef", "chefs")
assert not dataset._is_valid_syllogism(
(Quantifier.NO, S, M), # No students are humans
(Quantifier.NO, M, P), # No humans are chefs
(Quantifier.NO, S, P), # No students are chefs (invalid!)
)
child = Term("child", "children")
animal = Term("animal", "animals")
doctor = Term("doctor", "doctors")
# Premise 1: Some children are not animals
# Premise 2: All animals are doctors
# Conclusion: Some children are not doctors
# We expect this NOT to be a valid syllogism
assert not dataset._is_valid_syllogism(
(Quantifier.SOME_NOT, child, animal), # Some children are not animals
(Quantifier.ALL, animal, doctor), # All animals are doctors
(Quantifier.SOME_NOT, child, doctor), # Some children are not doctors
)
def test_syllogism_dataset_iteration():
"""Test that iteration respects dataset size"""
config = SyllogismConfig(size=5, seed=42)
dataset = SyllogismDataset(config)
items = list(dataset)
assert len(items) == config.size
# Test multiple iterations yield same items
assert items == list(dataset)