mirror of
https://github.com/open-thought/reasoning-gym.git
synced 2026-04-28 17:29:39 +00:00
New dataset categories: combinatorics, statistics, optimization, and formal languages. Extended existing algebra, arithmetic, probability, logic, and graphs packages with complex_advanced, linear_algebra, limits, number_theory, conditional_probability, set_operations, and job_scheduling. Each dataset includes config validation, deterministic seeding, custom scoring, curriculum support, and comprehensive unit tests (92 new tests).
158 lines
6.5 KiB
Python
158 lines
6.5 KiB
Python
import random
|
|
from dataclasses import dataclass, field
|
|
from fractions import Fraction
|
|
from typing import Any, Optional
|
|
|
|
from ..coaching import BaseCurriculum, RangeAttributeDefinition, ScalarAttributeDefinition
|
|
from ..factory import ProceduralDataset, register_dataset
|
|
|
|
DATASET_NAME = "conditional_probability"
|
|
|
|
TASK_TYPES = ("bayes", "dependent_draws", "contingency_table")
|
|
|
|
|
|
@dataclass
|
|
class ConditionalProbabilityConfig:
|
|
task_types: tuple[str, ...] = TASK_TYPES
|
|
task_weights: list[float] = field(default_factory=lambda: [0.34, 0.33, 0.33])
|
|
min_total_items: int = 5
|
|
max_total_items: int = 20
|
|
min_table_cell: int = 5
|
|
max_table_cell: int = 50
|
|
seed: Optional[int] = None
|
|
size: int = 500
|
|
|
|
def validate(self) -> None:
|
|
assert self.size > 0, "size must be positive"
|
|
assert len(self.task_types) > 0, "must have at least one task type"
|
|
assert all(t in TASK_TYPES for t in self.task_types), f"invalid task type"
|
|
assert len(self.task_weights) == len(self.task_types), "weights must match types"
|
|
assert self.min_total_items >= 2, "min_total_items must be >= 2"
|
|
assert self.max_total_items >= self.min_total_items, "max_total_items must be >= min_total_items"
|
|
|
|
|
|
class ConditionalProbabilityDataset(ProceduralDataset):
|
|
def __init__(self, config: ConditionalProbabilityConfig):
|
|
super().__init__(config=config, seed=config.seed, size=config.size)
|
|
|
|
def _make_bayes(self, rng: random.Random) -> dict:
|
|
sensitivity = Fraction(rng.randint(70, 99), 100)
|
|
specificity = Fraction(rng.randint(70, 99), 100)
|
|
prevalence = Fraction(rng.randint(1, 15), 100)
|
|
|
|
p_pos = sensitivity * prevalence + (1 - specificity) * (1 - prevalence)
|
|
p_disease_given_pos = (sensitivity * prevalence) / p_pos
|
|
|
|
question = (
|
|
f"A medical test has a sensitivity (true positive rate) of {sensitivity} "
|
|
f"and a specificity (true negative rate) of {specificity}. "
|
|
f"The prevalence of the disease in the population is {prevalence}. "
|
|
f"If a person tests positive, what is the probability they actually have the disease? "
|
|
f"Give your answer as a simplified fraction."
|
|
)
|
|
return {"question": question, "answer": str(p_disease_given_pos), "task_type": "bayes"}
|
|
|
|
def _make_dependent_draws(self, rng: random.Random) -> dict:
|
|
total = rng.randint(self.config.min_total_items, self.config.max_total_items)
|
|
color_a_count = rng.randint(2, total - 1)
|
|
color_b_count = total - color_a_count
|
|
draws = rng.randint(2, min(3, color_a_count))
|
|
|
|
color_a = rng.choice(["red", "blue", "green", "white", "black"])
|
|
color_b = rng.choice([c for c in ["red", "blue", "green", "white", "black"] if c != color_a])
|
|
|
|
prob = Fraction(1, 1)
|
|
for i in range(draws):
|
|
prob *= Fraction(color_a_count - i, total - i)
|
|
|
|
question = (
|
|
f"A bag contains {color_a_count} {color_a} balls and {color_b_count} {color_b} balls. "
|
|
f"You draw {draws} balls without replacement. "
|
|
f"What is the probability that all {draws} balls are {color_a}? "
|
|
f"Give your answer as a simplified fraction."
|
|
)
|
|
return {"question": question, "answer": str(prob), "task_type": "dependent_draws"}
|
|
|
|
def _make_contingency(self, rng: random.Random) -> dict:
|
|
a = rng.randint(self.config.min_table_cell, self.config.max_table_cell)
|
|
b = rng.randint(self.config.min_table_cell, self.config.max_table_cell)
|
|
c = rng.randint(self.config.min_table_cell, self.config.max_table_cell)
|
|
d = rng.randint(self.config.min_table_cell, self.config.max_table_cell)
|
|
|
|
total = a + b + c + d
|
|
row1_total = a + b
|
|
prob = Fraction(a, row1_total)
|
|
|
|
question = (
|
|
f"Consider the following contingency table:\n\n"
|
|
f" | Event B | Not B | Total\n"
|
|
f" Event A | {a:>4} | {b:>4} | {row1_total:>4}\n"
|
|
f" Not A | {c:>4} | {d:>4} | {c + d:>4}\n"
|
|
f" Total | {a + c:>4} | {b + d:>4} | {total:>4}\n\n"
|
|
f"Given that Event A occurred, what is the probability of Event B? "
|
|
f"Give your answer as a simplified fraction."
|
|
)
|
|
return {"question": question, "answer": str(prob), "task_type": "contingency_table"}
|
|
|
|
def __getitem__(self, idx: int) -> dict:
|
|
rng = random.Random(self.seed + idx)
|
|
task_type = rng.choices(self.config.task_types, weights=self.config.task_weights, k=1)[0]
|
|
|
|
generators = {
|
|
"bayes": self._make_bayes,
|
|
"dependent_draws": self._make_dependent_draws,
|
|
"contingency_table": self._make_contingency,
|
|
}
|
|
result = generators[task_type](rng)
|
|
return {
|
|
"question": result["question"],
|
|
"answer": result["answer"],
|
|
"metadata": {
|
|
"source_dataset": DATASET_NAME,
|
|
"source_index": idx,
|
|
"task_type": result["task_type"],
|
|
"difficulty": {
|
|
"min_total_items": self.config.min_total_items,
|
|
"max_total_items": self.config.max_total_items,
|
|
},
|
|
},
|
|
}
|
|
|
|
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
|
|
if answer is None:
|
|
return 0.0
|
|
oracle = entry["answer"]
|
|
if answer.strip() == oracle.strip():
|
|
return 1.0
|
|
try:
|
|
ans_frac = Fraction(answer.strip())
|
|
oracle_frac = Fraction(oracle.strip())
|
|
if ans_frac == oracle_frac:
|
|
return 1.0
|
|
diff = abs(float(ans_frac) - float(oracle_frac))
|
|
if diff < 1e-4:
|
|
return 0.9
|
|
if diff < 1e-2:
|
|
return 0.5
|
|
return 0.0
|
|
except (ValueError, ZeroDivisionError):
|
|
return 0.0
|
|
|
|
|
|
class ConditionalProbabilityCurriculum(BaseCurriculum):
|
|
def __init__(self):
|
|
super().__init__(ConditionalProbabilityCurriculum.__name__, ConditionalProbabilityConfig)
|
|
self._define_attributes(
|
|
RangeAttributeDefinition(
|
|
name="total_items",
|
|
levels=[5, 10, 20, 50],
|
|
lower_field_name="min_total_items",
|
|
upper_field_name="max_total_items",
|
|
description="Total items for draw problems",
|
|
),
|
|
)
|
|
|
|
|
|
register_dataset(
|
|
DATASET_NAME, ConditionalProbabilityDataset, ConditionalProbabilityConfig, ConditionalProbabilityCurriculum
|
|
)
|