mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-22 16:49:06 +00:00
formatting, cleanup
This commit is contained in:
parent
b767e58e48
commit
3dc80be7d2
12 changed files with 189 additions and 376 deletions
|
|
@ -2,7 +2,7 @@
|
||||||
Reasoning Gym - A library of procedural dataset generators for training reasoning models
|
Reasoning Gym - A library of procedural dataset generators for training reasoning models
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from . import algorithmic, algebra, arithmetic, cognition, data, games, graphs, logic
|
from . import algebra, algorithmic, arithmetic, cognition, data, games, graphs, logic
|
||||||
|
|
||||||
__version__ = "0.1.1"
|
__version__ = "0.1.1"
|
||||||
__all__ = ["arithmetic", "algorithmic", "algebra", "cognition", "data", "games", "graphs", "logic"]
|
__all__ = ["arithmetic", "algorithmic", "algebra", "cognition", "data", "games", "graphs", "logic"]
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,3 @@
|
||||||
from .simple_equations import SimpleEquationsDataset, SimpleEquationsConfig, simple_equations_dataset
|
from .simple_equations import SimpleEquationsConfig, SimpleEquationsDataset, simple_equations_dataset
|
||||||
|
|
||||||
__all__ = ["SimpleEquationsDataset", "SimpleEquationsConfig", "simple_equations_dataset"]
|
__all__ = ["SimpleEquationsDataset", "SimpleEquationsConfig", "simple_equations_dataset"]
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,10 @@
|
||||||
import random
|
import random
|
||||||
|
import string
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
import string
|
|
||||||
|
|
||||||
import sympy
|
import sympy
|
||||||
from sympy import Symbol, solve, Eq
|
from sympy import Eq, Symbol, solve
|
||||||
|
|
||||||
from ..dataset import ProceduralDataset
|
from ..dataset import ProceduralDataset
|
||||||
|
|
||||||
|
|
@ -12,11 +12,12 @@ from ..dataset import ProceduralDataset
|
||||||
@dataclass
|
@dataclass
|
||||||
class SimpleEquationsConfig:
|
class SimpleEquationsConfig:
|
||||||
"""Configuration for simple equation task generation"""
|
"""Configuration for simple equation task generation"""
|
||||||
|
|
||||||
min_terms: int = 2 # Minimum number of terms in expression
|
min_terms: int = 2 # Minimum number of terms in expression
|
||||||
max_terms: int = 4 # Maximum number of terms
|
max_terms: int = 4 # Maximum number of terms
|
||||||
min_value: int = 1 # Minimum value for constants
|
min_value: int = 1 # Minimum value for constants
|
||||||
max_value: int = 100 # Maximum value for constants
|
max_value: int = 100 # Maximum value for constants
|
||||||
operators: tuple = ('+', '-', '*') # Allowed operators
|
operators: tuple = ("+", "-", "*") # Allowed operators
|
||||||
seed: Optional[int] = None
|
seed: Optional[int] = None
|
||||||
size: int = 500
|
size: int = 500
|
||||||
|
|
||||||
|
|
@ -63,7 +64,7 @@ class SimpleEquationsDataset(ProceduralDataset):
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"equation": equation,
|
"equation": equation,
|
||||||
"variable": variable,
|
"variable": variable,
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_variable(self, rng: random.Random) -> str:
|
def _get_variable(self, rng: random.Random) -> str:
|
||||||
|
|
@ -100,9 +101,9 @@ class SimpleEquationsDataset(ProceduralDataset):
|
||||||
expr = terms[0]
|
expr = terms[0]
|
||||||
for i in range(1, num_terms):
|
for i in range(1, num_terms):
|
||||||
op = rng.choice(self.config.operators)
|
op = rng.choice(self.config.operators)
|
||||||
if op == '+':
|
if op == "+":
|
||||||
expr = expr + terms[i]
|
expr = expr + terms[i]
|
||||||
elif op == '-':
|
elif op == "-":
|
||||||
expr = expr - terms[i]
|
expr = expr - terms[i]
|
||||||
else: # '*'
|
else: # '*'
|
||||||
expr = expr * terms[i]
|
expr = expr * terms[i]
|
||||||
|
|
@ -134,7 +135,7 @@ def simple_equations_dataset(
|
||||||
max_terms: int = 5,
|
max_terms: int = 5,
|
||||||
min_value: int = 1,
|
min_value: int = 1,
|
||||||
max_value: int = 100,
|
max_value: int = 100,
|
||||||
operators: tuple = ('+', '-', '*'),
|
operators: tuple = ("+", "-", "*"),
|
||||||
seed: Optional[int] = None,
|
seed: Optional[int] = None,
|
||||||
size: int = 500,
|
size: int = 500,
|
||||||
) -> SimpleEquationsDataset:
|
) -> SimpleEquationsDataset:
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from .simple_equations import simple_equations_dataset
|
from .simple_equations import simple_equations_dataset
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -24,14 +25,7 @@ def test_simple_equations_generation():
|
||||||
question = item["question"]
|
question = item["question"]
|
||||||
assert variable in question
|
assert variable in question
|
||||||
assert equation in question
|
assert equation in question
|
||||||
assert any(
|
assert any(prompt in question for prompt in ["Find the value of", "Solve for", "Determine the value of"])
|
||||||
prompt in question
|
|
||||||
for prompt in [
|
|
||||||
"Find the value of",
|
|
||||||
"Solve for",
|
|
||||||
"Determine the value of"
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_simple_equations_config():
|
def test_simple_equations_config():
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,10 @@ from dataclasses import dataclass
|
||||||
from random import Random
|
from random import Random
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from ..dataset import ProceduralDataset
|
|
||||||
|
|
||||||
from reasoning_gym.data import read_data_file
|
from reasoning_gym.data import read_data_file
|
||||||
|
|
||||||
|
from ..dataset import ProceduralDataset
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class LetterCountingConfig:
|
class LetterCountingConfig:
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
import random
|
import random
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional, Dict, List, Set, Tuple
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from itertools import count
|
from itertools import count
|
||||||
|
from typing import Dict, List, Optional, Set, Tuple
|
||||||
|
|
||||||
from ..dataset import ProceduralDataset
|
from ..dataset import ProceduralDataset
|
||||||
|
|
||||||
|
|
@ -30,9 +30,9 @@ class Person:
|
||||||
name: str
|
name: str
|
||||||
gender: Gender
|
gender: Gender
|
||||||
id: int
|
id: int
|
||||||
spouse: Optional['Person'] = None
|
spouse: Optional["Person"] = None
|
||||||
parents: List['Person'] = None
|
parents: List["Person"] = None
|
||||||
children: List['Person'] = None
|
children: List["Person"] = None
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
self.parents = self.parents or []
|
self.parents = self.parents or []
|
||||||
|
|
@ -46,13 +46,13 @@ class Person:
|
||||||
return False
|
return False
|
||||||
return self.id == other.id
|
return self.id == other.id
|
||||||
|
|
||||||
def add_child(self, child: 'Person'):
|
def add_child(self, child: "Person"):
|
||||||
if child not in self.children:
|
if child not in self.children:
|
||||||
self.children.append(child)
|
self.children.append(child)
|
||||||
if self not in child.parents:
|
if self not in child.parents:
|
||||||
child.parents.append(self)
|
child.parents.append(self)
|
||||||
|
|
||||||
def add_spouse(self, spouse: 'Person'):
|
def add_spouse(self, spouse: "Person"):
|
||||||
self.spouse = spouse
|
self.spouse = spouse
|
||||||
spouse.spouse = self
|
spouse.spouse = self
|
||||||
|
|
||||||
|
|
@ -60,6 +60,7 @@ class Person:
|
||||||
@dataclass
|
@dataclass
|
||||||
class FamilyRelationshipsConfig:
|
class FamilyRelationshipsConfig:
|
||||||
"""Configuration for family relationship task generation"""
|
"""Configuration for family relationship task generation"""
|
||||||
|
|
||||||
min_family_size: int = 4
|
min_family_size: int = 4
|
||||||
max_family_size: int = 8
|
max_family_size: int = 8
|
||||||
male_names: List[str] = None
|
male_names: List[str] = None
|
||||||
|
|
@ -70,20 +71,94 @@ class FamilyRelationshipsConfig:
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
# Default name lists if none provided
|
# Default name lists if none provided
|
||||||
default_male_names = [
|
default_male_names = [
|
||||||
"James", "John", "Robert", "Michael", "William", "David", "Richard",
|
"James",
|
||||||
"Joseph", "Thomas", "Charles", "Peter", "Daniel", "Matthew",
|
"John",
|
||||||
"Christopher", "Andrew", "George", "Edward", "Benjamin", "Henry",
|
"Robert",
|
||||||
"Samuel", "Alexander", "Oliver", "Jack", "Harry", "Jacob",
|
"Michael",
|
||||||
"Noah", "Ethan", "Lucas", "Mason", "Logan", "Sebastian", "Theodore", "Owen",
|
"William",
|
||||||
"Liam", "Aiden", "Kai", "Jayden", "Zion", "Phoenix", "Atlas", "Axel", "Ryder", "Finn"
|
"David",
|
||||||
|
"Richard",
|
||||||
|
"Joseph",
|
||||||
|
"Thomas",
|
||||||
|
"Charles",
|
||||||
|
"Peter",
|
||||||
|
"Daniel",
|
||||||
|
"Matthew",
|
||||||
|
"Christopher",
|
||||||
|
"Andrew",
|
||||||
|
"George",
|
||||||
|
"Edward",
|
||||||
|
"Benjamin",
|
||||||
|
"Henry",
|
||||||
|
"Samuel",
|
||||||
|
"Alexander",
|
||||||
|
"Oliver",
|
||||||
|
"Jack",
|
||||||
|
"Harry",
|
||||||
|
"Jacob",
|
||||||
|
"Noah",
|
||||||
|
"Ethan",
|
||||||
|
"Lucas",
|
||||||
|
"Mason",
|
||||||
|
"Logan",
|
||||||
|
"Sebastian",
|
||||||
|
"Theodore",
|
||||||
|
"Owen",
|
||||||
|
"Liam",
|
||||||
|
"Aiden",
|
||||||
|
"Kai",
|
||||||
|
"Jayden",
|
||||||
|
"Zion",
|
||||||
|
"Phoenix",
|
||||||
|
"Atlas",
|
||||||
|
"Axel",
|
||||||
|
"Ryder",
|
||||||
|
"Finn",
|
||||||
]
|
]
|
||||||
default_female_names = [
|
default_female_names = [
|
||||||
"Mary", "Patricia", "Jennifer", "Linda", "Elizabeth", "Barbara", "Susan",
|
"Mary",
|
||||||
"Jessica", "Sarah", "Karen", "Emma", "Lisa", "Anna",
|
"Patricia",
|
||||||
"Margaret", "Victoria", "Charlotte", "Sophia", "Isabella", "Olivia",
|
"Jennifer",
|
||||||
"Ava", "Mia", "Emily", "Abigail", "Amelia", "Eleanor", "Grace",
|
"Linda",
|
||||||
"Alice", "Lucy", "Chloe", "Sophie", "Lily", "Hannah", "Zoe",
|
"Elizabeth",
|
||||||
"Luna", "Nova", "Aria", "Willow", "Aurora", "Sage", "River", "Winter", "Sky", "Rain"
|
"Barbara",
|
||||||
|
"Susan",
|
||||||
|
"Jessica",
|
||||||
|
"Sarah",
|
||||||
|
"Karen",
|
||||||
|
"Emma",
|
||||||
|
"Lisa",
|
||||||
|
"Anna",
|
||||||
|
"Margaret",
|
||||||
|
"Victoria",
|
||||||
|
"Charlotte",
|
||||||
|
"Sophia",
|
||||||
|
"Isabella",
|
||||||
|
"Olivia",
|
||||||
|
"Ava",
|
||||||
|
"Mia",
|
||||||
|
"Emily",
|
||||||
|
"Abigail",
|
||||||
|
"Amelia",
|
||||||
|
"Eleanor",
|
||||||
|
"Grace",
|
||||||
|
"Alice",
|
||||||
|
"Lucy",
|
||||||
|
"Chloe",
|
||||||
|
"Sophie",
|
||||||
|
"Lily",
|
||||||
|
"Hannah",
|
||||||
|
"Zoe",
|
||||||
|
"Luna",
|
||||||
|
"Nova",
|
||||||
|
"Aria",
|
||||||
|
"Willow",
|
||||||
|
"Aurora",
|
||||||
|
"Sage",
|
||||||
|
"River",
|
||||||
|
"Winter",
|
||||||
|
"Sky",
|
||||||
|
"Rain",
|
||||||
]
|
]
|
||||||
|
|
||||||
if self.male_names is None:
|
if self.male_names is None:
|
||||||
|
|
@ -125,10 +200,7 @@ class FamilyRelationshipsDataset(ProceduralDataset):
|
||||||
story = self._generate_story(family)
|
story = self._generate_story(family)
|
||||||
|
|
||||||
# Format question
|
# Format question
|
||||||
question = rng.choice(self._templates).format(
|
question = rng.choice(self._templates).format(person1=person1.name, person2=person2.name)
|
||||||
person1=person1.name,
|
|
||||||
person2=person2.name
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"question": f"{story}\n\n{question}",
|
"question": f"{story}\n\n{question}",
|
||||||
|
|
@ -137,8 +209,8 @@ class FamilyRelationshipsDataset(ProceduralDataset):
|
||||||
"person1": person1.name,
|
"person1": person1.name,
|
||||||
"person2": person2.name,
|
"person2": person2.name,
|
||||||
"relationship": relationship.value,
|
"relationship": relationship.value,
|
||||||
"family_size": len(family)
|
"family_size": len(family),
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _generate_family(self, rng: random.Random) -> Set[Person]:
|
def _generate_family(self, rng: random.Random) -> Set[Person]:
|
||||||
|
|
@ -148,8 +220,7 @@ class FamilyRelationshipsDataset(ProceduralDataset):
|
||||||
used_names = set()
|
used_names = set()
|
||||||
|
|
||||||
def get_name(gender: Gender) -> str:
|
def get_name(gender: Gender) -> str:
|
||||||
names = (self.config.male_names if gender == Gender.MALE
|
names = self.config.male_names if gender == Gender.MALE else self.config.female_names
|
||||||
else self.config.female_names)
|
|
||||||
available = [n for n in names if n not in used_names]
|
available = [n for n in names if n not in used_names]
|
||||||
if not available:
|
if not available:
|
||||||
return None
|
return None
|
||||||
|
|
@ -195,21 +266,15 @@ class FamilyRelationshipsDataset(ProceduralDataset):
|
||||||
|
|
||||||
# Determine relationship
|
# Determine relationship
|
||||||
if person1 in person2.parents:
|
if person1 in person2.parents:
|
||||||
relationship = (Relationship.MOTHER if person1.gender == Gender.FEMALE
|
relationship = Relationship.MOTHER if person1.gender == Gender.FEMALE else Relationship.FATHER
|
||||||
else Relationship.FATHER)
|
|
||||||
elif person2 in person1.parents:
|
elif person2 in person1.parents:
|
||||||
relationship = (Relationship.DAUGHTER if person1.gender == Gender.FEMALE
|
relationship = Relationship.DAUGHTER if person1.gender == Gender.FEMALE else Relationship.SON
|
||||||
else Relationship.SON)
|
|
||||||
elif person1.spouse == person2:
|
elif person1.spouse == person2:
|
||||||
relationship = (Relationship.WIFE if person1.gender == Gender.FEMALE
|
relationship = Relationship.WIFE if person1.gender == Gender.FEMALE else Relationship.HUSBAND
|
||||||
else Relationship.HUSBAND)
|
elif person1.parents and person2.parents and set(person1.parents) == set(person2.parents):
|
||||||
elif (person1.parents and person2.parents and
|
relationship = Relationship.SISTER if person1.gender == Gender.FEMALE else Relationship.BROTHER
|
||||||
set(person1.parents) == set(person2.parents)):
|
elif person1 in [p for parent in person2.parents for p in parent.parents]:
|
||||||
relationship = (Relationship.SISTER if person1.gender == Gender.FEMALE
|
relationship = Relationship.GRANDMOTHER if person1.gender == Gender.FEMALE else Relationship.GRANDFATHER
|
||||||
else Relationship.BROTHER)
|
|
||||||
elif (person1 in [p for parent in person2.parents for p in parent.parents]):
|
|
||||||
relationship = (Relationship.GRANDMOTHER if person1.gender == Gender.FEMALE
|
|
||||||
else Relationship.GRANDFATHER)
|
|
||||||
else:
|
else:
|
||||||
# Try again with different people
|
# Try again with different people
|
||||||
return self._get_relationship_question(rng, family)
|
return self._get_relationship_question(rng, family)
|
||||||
|
|
@ -238,15 +303,11 @@ class FamilyRelationshipsDataset(ProceduralDataset):
|
||||||
described_children.update(children) # Mark these children as described
|
described_children.update(children) # Mark these children as described
|
||||||
|
|
||||||
if len(children_names) == 1:
|
if len(children_names) == 1:
|
||||||
story_parts.append(
|
story_parts.append(f"They have a child called {children_names[0]}.")
|
||||||
f"They have a child called {children_names[0]}."
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
*first, last = children_names
|
*first, last = children_names
|
||||||
children_str = ", ".join(first) + f" and {last}"
|
children_str = ", ".join(first) + f" and {last}"
|
||||||
story_parts.append(
|
story_parts.append(f"They have children called {children_str}.")
|
||||||
f"They have children called {children_str}."
|
|
||||||
)
|
|
||||||
|
|
||||||
return " ".join(story_parts)
|
return " ".join(story_parts)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,242 +0,0 @@
|
||||||
import random
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import Optional, Dict, List, Set, Tuple
|
|
||||||
from enum import Enum
|
|
||||||
|
|
||||||
from ..dataset import ProceduralDataset
|
|
||||||
|
|
||||||
|
|
||||||
class Gender(Enum):
|
|
||||||
MALE = "male"
|
|
||||||
FEMALE = "female"
|
|
||||||
|
|
||||||
|
|
||||||
class Relationship(Enum):
|
|
||||||
MOTHER = "Mother"
|
|
||||||
FATHER = "Father"
|
|
||||||
SISTER = "Sister"
|
|
||||||
BROTHER = "Brother"
|
|
||||||
DAUGHTER = "Daughter"
|
|
||||||
SON = "Son"
|
|
||||||
WIFE = "Wife"
|
|
||||||
HUSBAND = "Husband"
|
|
||||||
GRANDMOTHER = "Grandmother"
|
|
||||||
GRANDFATHER = "Grandfather"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Person:
|
|
||||||
name: str
|
|
||||||
gender: Gender
|
|
||||||
spouse: Optional['Person'] = None
|
|
||||||
parents: List['Person'] = None
|
|
||||||
children: List['Person'] = None
|
|
||||||
|
|
||||||
def __post_init__(self):
|
|
||||||
self.parents = self.parents or []
|
|
||||||
self.children = self.children or []
|
|
||||||
|
|
||||||
def add_child(self, child: 'Person'):
|
|
||||||
if child not in self.children:
|
|
||||||
self.children.append(child)
|
|
||||||
if self not in child.parents:
|
|
||||||
child.parents.append(self)
|
|
||||||
|
|
||||||
def add_spouse(self, spouse: 'Person'):
|
|
||||||
self.spouse = spouse
|
|
||||||
spouse.spouse = self
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class FamilyRelationshipsConfig:
|
|
||||||
"""Configuration for family relationship task generation"""
|
|
||||||
min_family_size: int = 4
|
|
||||||
max_family_size: int = 8
|
|
||||||
male_names: List[str] = None
|
|
||||||
female_names: List[str] = None
|
|
||||||
seed: Optional[int] = None
|
|
||||||
size: int = 500
|
|
||||||
|
|
||||||
def __post_init__(self):
|
|
||||||
# Default name lists if none provided
|
|
||||||
self.male_names = self.male_names or [
|
|
||||||
"James", "John", "Robert", "Michael", "William", "David", "Richard",
|
|
||||||
"Joseph", "Thomas", "Charles", "Peter", "Daniel", "Matthew"
|
|
||||||
]
|
|
||||||
self.female_names = self.female_names or [
|
|
||||||
"Mary", "Patricia", "Jennifer", "Linda", "Elizabeth", "Barbara", "Susan",
|
|
||||||
"Jessica", "Sarah", "Karen", "Emma", "Lisa", "Anna"
|
|
||||||
]
|
|
||||||
|
|
||||||
def validate(self):
|
|
||||||
"""Validate configuration parameters"""
|
|
||||||
assert self.min_family_size >= 3, "min_family_size must be at least 3"
|
|
||||||
assert self.max_family_size >= self.min_family_size, "max_family_size must be >= min_family_size"
|
|
||||||
assert len(self.male_names) > 0, "must provide male names"
|
|
||||||
assert len(self.female_names) > 0, "must provide female names"
|
|
||||||
|
|
||||||
|
|
||||||
class FamilyRelationshipsDataset(ProceduralDataset):
|
|
||||||
"""Generates family relationship reasoning tasks"""
|
|
||||||
|
|
||||||
def __init__(self, config: FamilyRelationshipsConfig):
|
|
||||||
self.config = config
|
|
||||||
self.config.validate()
|
|
||||||
self._templates = [
|
|
||||||
"What is {person1} to {person2}?",
|
|
||||||
"How is {person1} related to {person2}?",
|
|
||||||
"What relation is {person1} to {person2}?",
|
|
||||||
]
|
|
||||||
super().__init__(seed=config.seed, size=config.size)
|
|
||||||
|
|
||||||
def __getitem__(self, idx: int) -> dict:
|
|
||||||
rng = random.Random(self.seed + idx)
|
|
||||||
|
|
||||||
# Generate family tree
|
|
||||||
family = self._generate_family(rng)
|
|
||||||
|
|
||||||
# Select two people and their relationship
|
|
||||||
person1, person2, relationship = self._get_relationship_question(rng, family)
|
|
||||||
|
|
||||||
# Generate story describing the family relationships
|
|
||||||
story = self._generate_story(family)
|
|
||||||
|
|
||||||
# Format question
|
|
||||||
question = rng.choice(self._templates).format(
|
|
||||||
person1=person1.name,
|
|
||||||
person2=person2.name
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"question": f"{story}\n\n{question}",
|
|
||||||
"answer": relationship.value,
|
|
||||||
"metadata": {
|
|
||||||
"person1": person1.name,
|
|
||||||
"person2": person2.name,
|
|
||||||
"relationship": relationship.value,
|
|
||||||
"family_size": len(family)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _generate_family(self, rng: random.Random) -> Set[Person]:
|
|
||||||
"""Generate a random family tree"""
|
|
||||||
family_size = rng.randint(self.config.min_family_size, self.config.max_family_size)
|
|
||||||
family = set()
|
|
||||||
used_names = set()
|
|
||||||
|
|
||||||
def get_name(gender: Gender) -> str:
|
|
||||||
names = (self.config.male_names if gender == Gender.MALE
|
|
||||||
else self.config.female_names)
|
|
||||||
available = [n for n in names if n not in used_names]
|
|
||||||
if not available:
|
|
||||||
return None
|
|
||||||
name = rng.choice(available)
|
|
||||||
used_names.add(name)
|
|
||||||
return name
|
|
||||||
|
|
||||||
# Create grandparents generation
|
|
||||||
grandfather = Person(get_name(Gender.MALE), Gender.MALE)
|
|
||||||
grandmother = Person(get_name(Gender.FEMALE), Gender.FEMALE)
|
|
||||||
grandfather.add_spouse(grandmother)
|
|
||||||
family.update([grandfather, grandmother])
|
|
||||||
|
|
||||||
# Create parents
|
|
||||||
father = Person(get_name(Gender.MALE), Gender.MALE)
|
|
||||||
mother = Person(get_name(Gender.FEMALE), Gender.FEMALE)
|
|
||||||
father.add_spouse(mother)
|
|
||||||
grandfather.add_child(father)
|
|
||||||
grandmother.add_child(father)
|
|
||||||
family.update([father, mother])
|
|
||||||
|
|
||||||
# Add children
|
|
||||||
while len(family) < family_size:
|
|
||||||
gender = rng.choice([Gender.MALE, Gender.FEMALE])
|
|
||||||
name = get_name(gender)
|
|
||||||
if not name:
|
|
||||||
break
|
|
||||||
child = Person(name, gender)
|
|
||||||
father.add_child(child)
|
|
||||||
mother.add_child(child)
|
|
||||||
family.add(child)
|
|
||||||
|
|
||||||
return family
|
|
||||||
|
|
||||||
def _get_relationship_question(
|
|
||||||
self, rng: random.Random, family: Set[Person]
|
|
||||||
) -> Tuple[Person, Person, Relationship]:
|
|
||||||
"""Select two family members and determine their relationship"""
|
|
||||||
person1, person2 = rng.sample(list(family), 2)
|
|
||||||
|
|
||||||
# Determine relationship
|
|
||||||
if person1 in person2.parents:
|
|
||||||
relationship = (Relationship.MOTHER if person1.gender == Gender.FEMALE
|
|
||||||
else Relationship.FATHER)
|
|
||||||
elif person2 in person1.parents:
|
|
||||||
relationship = (Relationship.DAUGHTER if person1.gender == Gender.FEMALE
|
|
||||||
else Relationship.SON)
|
|
||||||
elif person1.spouse == person2:
|
|
||||||
relationship = (Relationship.WIFE if person1.gender == Gender.FEMALE
|
|
||||||
else Relationship.HUSBAND)
|
|
||||||
elif (person1.parents and person2.parents and
|
|
||||||
set(person1.parents) == set(person2.parents)):
|
|
||||||
relationship = (Relationship.SISTER if person1.gender == Gender.FEMALE
|
|
||||||
else Relationship.BROTHER)
|
|
||||||
elif (person1 in [p for parent in person2.parents for p in parent.parents]):
|
|
||||||
relationship = (Relationship.GRANDMOTHER if person1.gender == Gender.FEMALE
|
|
||||||
else Relationship.GRANDFATHER)
|
|
||||||
else:
|
|
||||||
# Try again with different people
|
|
||||||
return self._get_relationship_question(rng, family)
|
|
||||||
|
|
||||||
return person1, person2, relationship
|
|
||||||
|
|
||||||
def _generate_story(self, family: Set[Person]) -> str:
|
|
||||||
"""Generate a story describing the family relationships"""
|
|
||||||
story_parts = []
|
|
||||||
|
|
||||||
# Find married couples
|
|
||||||
couples = set()
|
|
||||||
for person in family:
|
|
||||||
if person.spouse and (person.spouse, person) not in couples:
|
|
||||||
couples.add((person, person.spouse))
|
|
||||||
|
|
||||||
# Describe marriages
|
|
||||||
for person1, person2 in couples:
|
|
||||||
story_parts.append(f"{person1.name} is married to {person2.name}.")
|
|
||||||
|
|
||||||
# Describe parent-child relationships
|
|
||||||
for person in family:
|
|
||||||
if person.children:
|
|
||||||
children_names = [c.name for c in person.children]
|
|
||||||
if len(children_names) == 1:
|
|
||||||
story_parts.append(
|
|
||||||
f"They have a child called {children_names[0]}."
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
*first, last = children_names
|
|
||||||
children_str = ", ".join(first) + f" and {last}"
|
|
||||||
story_parts.append(
|
|
||||||
f"They have children called {children_str}."
|
|
||||||
)
|
|
||||||
|
|
||||||
return " ".join(story_parts)
|
|
||||||
|
|
||||||
|
|
||||||
def family_relationships_dataset(
|
|
||||||
min_family_size: int = 4,
|
|
||||||
max_family_size: int = 8,
|
|
||||||
male_names: List[str] = None,
|
|
||||||
female_names: List[str] = None,
|
|
||||||
seed: Optional[int] = None,
|
|
||||||
size: int = 500,
|
|
||||||
) -> FamilyRelationshipsDataset:
|
|
||||||
"""Create a FamilyRelationshipsDataset with the given configuration"""
|
|
||||||
config = FamilyRelationshipsConfig(
|
|
||||||
min_family_size=min_family_size,
|
|
||||||
max_family_size=max_family_size,
|
|
||||||
male_names=male_names,
|
|
||||||
female_names=female_names,
|
|
||||||
seed=seed,
|
|
||||||
size=size,
|
|
||||||
)
|
|
||||||
return FamilyRelationshipsDataset(config)
|
|
||||||
|
|
@ -1,8 +1,6 @@
|
||||||
from reasoning_gym.graphs.family_relationships import (
|
import pytest
|
||||||
family_relationships_dataset,
|
|
||||||
Gender,
|
from reasoning_gym.graphs.family_relationships import Gender, Relationship, family_relationships_dataset
|
||||||
Relationship,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_family_relationships_generation():
|
def test_family_relationships_generation():
|
||||||
|
|
@ -18,14 +16,7 @@ def test_family_relationships_generation():
|
||||||
story_and_question = item["question"]
|
story_and_question = item["question"]
|
||||||
assert "is married to" in story_and_question
|
assert "is married to" in story_and_question
|
||||||
assert "have" in story_and_question
|
assert "have" in story_and_question
|
||||||
assert any(
|
assert any(prompt in story_and_question for prompt in ["What is", "How is", "What relation is"])
|
||||||
prompt in story_and_question
|
|
||||||
for prompt in [
|
|
||||||
"What is",
|
|
||||||
"How is",
|
|
||||||
"What relation is"
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Validate answer is a valid relationship
|
# Validate answer is a valid relationship
|
||||||
assert item["answer"] in [r.value for r in Relationship]
|
assert item["answer"] in [r.value for r in Relationship]
|
||||||
|
|
@ -68,11 +59,19 @@ def test_relationship_consistency():
|
||||||
for item in dataset:
|
for item in dataset:
|
||||||
# Check that relationship matches the gender
|
# Check that relationship matches the gender
|
||||||
relationship = item["metadata"]["relationship"]
|
relationship = item["metadata"]["relationship"]
|
||||||
if relationship in [Relationship.MOTHER.value, Relationship.GRANDMOTHER.value,
|
if relationship in [
|
||||||
Relationship.WIFE.value, Relationship.SISTER.value,
|
Relationship.MOTHER.value,
|
||||||
Relationship.DAUGHTER.value]:
|
Relationship.GRANDMOTHER.value,
|
||||||
|
Relationship.WIFE.value,
|
||||||
|
Relationship.SISTER.value,
|
||||||
|
Relationship.DAUGHTER.value,
|
||||||
|
]:
|
||||||
assert "married to" in item["question"] or "child" in item["question"]
|
assert "married to" in item["question"] or "child" in item["question"]
|
||||||
elif relationship in [Relationship.FATHER.value, Relationship.GRANDFATHER.value,
|
elif relationship in [
|
||||||
Relationship.HUSBAND.value, Relationship.BROTHER.value,
|
Relationship.FATHER.value,
|
||||||
Relationship.SON.value]:
|
Relationship.GRANDFATHER.value,
|
||||||
|
Relationship.HUSBAND.value,
|
||||||
|
Relationship.BROTHER.value,
|
||||||
|
Relationship.SON.value,
|
||||||
|
]:
|
||||||
assert "married to" in item["question"] or "child" in item["question"]
|
assert "married to" in item["question"] or "child" in item["question"]
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from reasoning_gym.cognition.number_sequences import Operation, PatternRule, NumberSequenceConfig, NumberSequenceDataset
|
from reasoning_gym.cognition.number_sequences import NumberSequenceConfig, NumberSequenceDataset, Operation, PatternRule
|
||||||
|
|
||||||
|
|
||||||
def test_sequence_config_validation():
|
def test_sequence_config_validation():
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue