Refactor and improve code formatting for readability in combinatorics and probability modules

This commit is contained in:
Ritvik19 2026-04-18 19:57:16 +05:30
parent dc0d81c096
commit 0f7b8b7986
5 changed files with 62 additions and 37 deletions

View file

@ -36,9 +36,22 @@ class CombinatoricsConfig:
task_types: tuple[str, ...] = TASK_TYPES
task_weights: list[float] = field(
default_factory=lambda: [
0.08, 0.06, 0.08, 0.08, 0.06, 0.04,
0.07, 0.07, 0.07, 0.07, 0.07, 0.07,
0.06, 0.06, 0.06, 0.06,
0.08,
0.06,
0.08,
0.08,
0.06,
0.04,
0.07,
0.07,
0.07,
0.07,
0.07,
0.07,
0.06,
0.06,
0.06,
0.06,
]
)
seed: Optional[int] = None
@ -241,8 +254,7 @@ class CombinatoricsDataset(ProceduralDataset):
n = rng.randint(max(4, self.config.min_n), max(5, self.config.max_n))
answer = n * (n - 3) // 2
question = (
f"How many diagonals does a {n}-sided convex polygon have? "
f"Give your answer as a single integer."
f"How many diagonals does a {n}-sided convex polygon have? " f"Give your answer as a single integer."
)
return {"question": question, "answer": str(answer), "task_type": "geometric_counting"}

View file

@ -8,11 +8,7 @@ from .conditional_probability import (
ConditionalProbabilityCurriculum,
ConditionalProbabilityDataset,
)
from .probability_problems import (
ProbabilityProblemsConfig,
ProbabilityProblemsCurriculum,
ProbabilityProblemsDataset,
)
from .probability_problems import ProbabilityProblemsConfig, ProbabilityProblemsCurriculum, ProbabilityProblemsDataset
__all__ = [
"CoinFlipDataset",

View file

@ -29,8 +29,15 @@ class ProbabilityProblemsConfig:
task_types: tuple[str, ...] = TASK_TYPES
task_weights: list[float] = field(
default_factory=lambda: [
0.12, 0.11, 0.12, 0.12,
0.11, 0.10, 0.11, 0.10, 0.11,
0.12,
0.11,
0.12,
0.12,
0.11,
0.10,
0.11,
0.10,
0.11,
]
)
seed: Optional[int] = None
@ -123,10 +130,7 @@ class ProbabilityProblemsDataset(ProceduralDataset):
for red, blue in bags:
p_red += p_bag * Fraction(red, red + blue)
bag_desc = ". ".join(
f"Bag {i + 1} contains {r} red and {b} blue balls"
for i, (r, b) in enumerate(bags)
)
bag_desc = ". ".join(f"Bag {i + 1} contains {r} red and {b} blue balls" for i, (r, b) in enumerate(bags))
question = (
f"{bag_desc}. "
f"One bag is chosen uniformly at random and a ball is drawn from it. "
@ -154,10 +158,7 @@ class ProbabilityProblemsDataset(ProceduralDataset):
p_red_given_target = Fraction(red_t, red_t + blue_t)
p_target_given_red = (p_bag * p_red_given_target) / p_red
bag_desc = ". ".join(
f"Bag {i + 1} contains {r} red and {b} blue balls"
for i, (r, b) in enumerate(bags)
)
bag_desc = ". ".join(f"Bag {i + 1} contains {r} red and {b} blue balls" for i, (r, b) in enumerate(bags))
question = (
f"{bag_desc}. "
f"One bag is chosen uniformly at random and a ball is drawn. The ball is red. "
@ -170,15 +171,19 @@ class ProbabilityProblemsDataset(ProceduralDataset):
def _make_binomial_probability(self, rng: random.Random) -> dict:
p_choices = [
Fraction(1, 6), Fraction(1, 4), Fraction(1, 3),
Fraction(1, 2), Fraction(2, 3), Fraction(3, 4),
Fraction(1, 6),
Fraction(1, 4),
Fraction(1, 3),
Fraction(1, 2),
Fraction(2, 3),
Fraction(3, 4),
]
p = rng.choice(p_choices)
q = 1 - p
n = rng.randint(self.config.min_n, min(self.config.max_n, 8))
r = rng.randint(0, n)
prob = Fraction(math.comb(n, r)) * (p ** r) * (q ** (n - r))
prob = Fraction(math.comb(n, r)) * (p**r) * (q ** (n - r))
question = (
f"A biased coin has a probability of heads equal to {p}. "
f"If it is flipped {n} times, what is the probability of getting exactly {r} heads? "
@ -188,8 +193,12 @@ class ProbabilityProblemsDataset(ProceduralDataset):
def _make_binomial_stats(self, rng: random.Random) -> dict:
p_choices = [
Fraction(1, 6), Fraction(1, 4), Fraction(1, 3),
Fraction(1, 2), Fraction(2, 3), Fraction(3, 4),
Fraction(1, 6),
Fraction(1, 4),
Fraction(1, 3),
Fraction(1, 2),
Fraction(2, 3),
Fraction(3, 4),
]
p = rng.choice(p_choices)
q = 1 - p
@ -216,8 +225,11 @@ class ProbabilityProblemsDataset(ProceduralDataset):
def _make_geometric_series(self, rng: random.Random) -> dict:
p_choices = [
Fraction(1, 6), Fraction(1, 5), Fraction(1, 4),
Fraction(1, 3), Fraction(1, 2),
Fraction(1, 6),
Fraction(1, 5),
Fraction(1, 4),
Fraction(1, 3),
Fraction(1, 2),
]
p = rng.choice(p_choices)
q = rng.choice(p_choices)
@ -354,6 +366,4 @@ class ProbabilityProblemsCurriculum(BaseCurriculum):
)
register_dataset(
DATASET_NAME, ProbabilityProblemsDataset, ProbabilityProblemsConfig, ProbabilityProblemsCurriculum
)
register_dataset(DATASET_NAME, ProbabilityProblemsDataset, ProbabilityProblemsConfig, ProbabilityProblemsCurriculum)

View file

@ -181,7 +181,9 @@ def test_legendres_formula_known_values():
def test_legendres_formula_manual():
"""Power of 2 in 10! = floor(10/2) + floor(10/4) + floor(10/8) = 5+2+1 = 8."""
config = CombinatoricsConfig(seed=0, size=50, task_types=("legendres_formula",), task_weights=[1.0], min_n=10, max_n=10)
config = CombinatoricsConfig(
seed=0, size=50, task_types=("legendres_formula",), task_weights=[1.0], min_n=10, max_n=10
)
ds = CombinatoricsDataset(config)
for i in range(len(ds)):
item = ds[i]
@ -202,9 +204,16 @@ def test_integral_solutions_known_values():
def test_all_new_types_score_oracle():
"""Oracle answers should all score 1.0."""
new_types = (
"multinomial", "grid_paths", "constrained_selection", "circular_permutation",
"geometric_counting", "dictionary_rank", "derangement", "group_division",
"legendres_formula", "integral_solutions",
"multinomial",
"grid_paths",
"constrained_selection",
"circular_permutation",
"geometric_counting",
"dictionary_rank",
"derangement",
"group_division",
"legendres_formula",
"integral_solutions",
)
for tt in new_types:
config = CombinatoricsConfig(seed=42, size=10, task_types=(tt,), task_weights=[1.0])

View file

@ -77,9 +77,7 @@ def test_score_wrong_answer():
def test_score_equivalent_fraction():
config = ProbabilityProblemsConfig(
seed=42, size=10, task_types=("independent_events",), task_weights=[1.0]
)
config = ProbabilityProblemsConfig(seed=42, size=10, task_types=("independent_events",), task_weights=[1.0])
ds = ProbabilityProblemsDataset(config)
item = ds[0]
oracle_frac = Fraction(item["answer"])