Merge upstream/main: greedy baseline fix + additive skill boost

Resolved conflicts — combined best of both: - bot_runner.py: kept our trust-aware candidate building + upstream's tier-avg rates + no task cap - task_complete.py: upstream's additive skill boost (nerfs greedy snowball) + our configurable cap (wc.skill_rate_max instead of hardcoded 10) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-01 17:45:20 +00:00 · 2026-03-09 17:38:53 -07:00 · 2026-03-09 17:38:53 -07:00 · d28ccb1bb2
commit d28ccb1bb2
parent 11f4b89144 a38b9f4135
14 changed files with 50 additions and 144 deletions
--- a/src/yc_bench/config/presets/easy.toml
+++ b/src/yc_bench/config/presets/easy.toml
@ -31,7 +31,7 @@ auto_advance_after_turns = 8
 initial_funds_cents = 20_000_000    # $200,000
 # Inherits num_employees=10, num_market_tasks=200 from default.

-# Moderate deadlines: 100 qty/day → 10-day deadline for mode task.
+# Moderate deadlines: 1000/100 = 10 days. Comfortable margin.
 deadline_qty_per_day = 100.0

 # Original (un-hardened) penalties — costly but not catastrophic.
--- a/src/yc_bench/config/presets/hard.toml
+++ b/src/yc_bench/config/presets/hard.toml
@ -44,8 +44,8 @@ initial_funds_cents = 10_000_000    # $100,000 — must reach prestige 3 by mont
 # Inherits num_employees=10, num_market_tasks=200 from default.

 # Tight deadlines: 2000/220 = 9.1 days.
-# 1 task with 5 per domain → 8.7 days. Just fits.
-# 2 concurrent tasks → 17.4 days. Guaranteed miss.
+# With domain specialization (some employees have 0 in some domains),
+# effective team per domain is smaller — completion time varies by task.
 deadline_qty_per_day = 220.0

 # Stiff penalties — mistakes cost real prestige.
--- a/src/yc_bench/config/presets/medium.toml
+++ b/src/yc_bench/config/presets/medium.toml
@ -38,9 +38,7 @@ auto_advance_after_turns = 8
 [world]
 # Inherits num_employees=10, num_market_tasks=200 from default.

-# Deadline uses max per-domain qty. 1500/150 = 10 days.
-# 1 task with 5 per domain → 6.5 days. Comfortable.
-# 2 concurrent tasks → 13 days. Miss.
+# Deadline: 1500/150 = 10 days. Moderate pressure.
 deadline_qty_per_day = 150.0

 # Real penalties — failing costs prestige, cancelling costs more.
--- a/src/yc_bench/config/presets/nightmare.toml
+++ b/src/yc_bench/config/presets/nightmare.toml
@ -53,8 +53,7 @@ initial_funds_cents = 8_000_000     # $80,000 — razor-thin runway
 # Inherits num_employees=10, num_market_tasks=200 from default.

 # Razor deadlines: 2500/220 = 11.4 days.
-# 1 task with 5 per domain → 10.9 days. Barely fits.
-# 2 concurrent tasks → 21.8 days. Guaranteed miss.
+# With domain specialization, effective team is smaller — razor-tight.
 deadline_qty_per_day = 220.0

 # Catastrophic penalties — there is no good exit from a bad accept.
--- a/src/yc_bench/config/presets/tutorial.toml
+++ b/src/yc_bench/config/presets/tutorial.toml
@ -31,7 +31,7 @@ auto_advance_after_turns = 5
 initial_funds_cents = 25_000_000    # $250,000 — very forgiving buffer
 # Inherits num_employees=10, num_market_tasks=200 from default.

-# Generous deadlines: 50 qty/day → mode task gets 12-day deadline.
+# Generous deadlines: 600/50 = 12 days. Very comfortable.
 deadline_qty_per_day = 50.0

 # Negligible penalties — mistakes barely hurt.
--- a/src/yc_bench/core/handlers/task_complete.py
+++ b/src/yc_bench/core/handlers/task_complete.py
@ -97,9 +97,11 @@ def handle_task_complete(db: Session, event: SimEvent, sim_time) -> TaskComplete
                        EmployeeSkillRate.employee_id == a.employee_id,
                        EmployeeSkillRate.domain == domain,
                    ).one_or_none()
-                    if skill is not None and skill.rate_domain_per_hour < wc.skill_rate_max:
-                        boost = skill.rate_domain_per_hour * task.skill_boost_pct
-                        skill.rate_domain_per_hour = min(wc.skill_rate_max, skill.rate_domain_per_hour + boost)
+                    if skill is not None:
+                        skill.rate_domain_per_hour = min(
+                            skill.rate_domain_per_hour + task.skill_boost_pct,
+                            Decimal(str(wc.skill_rate_max)),
+                        )

        # Salary bump: small raise for each employee who contributed to this task
        if wc.salary_bump_pct > 0:
--- a/src/yc_bench/core/progress.py
+++ b/src/yc_bench/core/progress.py
@ -65,7 +65,8 @@ def _rates_by_employee_domain(rates):
        m[(r.employee_id, r.domain)] = r.rate_domain_per_hour
    return m

-def _effective_rate_for_task_domain(*, task_id, domain, assignments, assignment_counts, base_rates):
+def _effective_rate_for_task_domain(*, task_id, domain, assignments,
+                                    assignment_counts, base_rates):
    total = Decimal("0")
    for a in assignments:
        if a.task_id != task_id:
@ -226,7 +227,7 @@ def compute_effective_rates(db, company_id):
    for a in assignments:
        assignments_by_task.setdefault(a.task_id, []).append(a)
        assignment_counts[a.employee_id] = assignment_counts.get(a.employee_id, 0) + 1
-    
+
    employee_ids = list(assignment_counts.keys())
    skill_rows = db.query(EmployeeSkillRate).filter(EmployeeSkillRate.employee_id.in_(employee_ids)).all()

@ -243,7 +244,7 @@ def compute_effective_rates(db, company_id):
                continue
            base = base_rates.get((a.employee_id, req.domain), Decimal("0"))
            total += base / Decimal(k)
-        
+
        out.append(EffectiveRate(
            task_id=req.task_id,
            domain=req.domain,
--- a/src/yc_bench/services/generate_employees.py
+++ b/src/yc_bench/services/generate_employees.py
@ -1,6 +1,5 @@
 from __future__ import annotations

-import math
 from dataclasses import dataclass

 from ..config.schema import WorldConfig
@ -18,9 +17,6 @@ _TIER_SEQUENCE = [
    "senior", "senior",
 ]

-_MIN_RATE = 1.0
-_MAX_RATE = 10.0
-

@dataclass(frozen=True)
 class GeneratedEmployee:
@ -47,49 +43,9 @@ def _sample_salary_cents(rng, cfg, tier_name):
    return sample_right_skew_triangular_int(rng, tier.min_cents, tier.max_cents)


-def _dirichlet_sample(rng, alpha, k):
-    """Sample from Dirichlet(alpha, ..., alpha) with k components."""
-    raw = [rng.gammavariate(alpha, 1.0) for _ in range(k)]
-    total = sum(raw)
-    if total == 0:
-        return [1.0 / k] * k
-    return [x / total for x in raw]
-
-
-def _distribute_rates(rng, avg_rate, dirichlet_alpha=0.3):
-    """Distribute a rate budget across domains with spiky concentration.
-
-    Each domain gets at least _MIN_RATE.  The extra budget is split via
-    Dirichlet(alpha) so that one or two domains can be dramatically higher
-    than the rest — a junior can secretly be a superstar in one domain.
-    Individual rates are capped at _MAX_RATE.
-    """
-    total_budget = avg_rate * _NUM_DOMAINS
-    extra = total_budget - _NUM_DOMAINS * _MIN_RATE
-
-    if extra <= 0:
-        return [_MIN_RATE] * _NUM_DOMAINS
-
-    proportions = _dirichlet_sample(rng, dirichlet_alpha, _NUM_DOMAINS)
-    rates = [_MIN_RATE + extra * p for p in proportions]
-
-    # Cap at _MAX_RATE and redistribute excess iteratively.
-    for _ in range(5):
-        overflow = 0.0
-        uncapped = []
-        for i in range(_NUM_DOMAINS):
-            if rates[i] > _MAX_RATE:
-                overflow += rates[i] - _MAX_RATE
-                rates[i] = _MAX_RATE
-            else:
-                uncapped.append(i)
-        if overflow <= 0 or not uncapped:
-            break
-        share = overflow / len(uncapped)
-        for i in uncapped:
-            rates[i] += share
-
-    return [round(r, 4) for r in rates]
+def _sample_domain_rates(rng, max_rate):
+    """Sample each domain's rate independently from 0 to max_rate."""
+    return [round(rng.uniform(0, max_rate), 4) for _ in range(_NUM_DOMAINS)]


 def generate_employees(*, run_seed, count, cfg=None):
@ -112,10 +68,7 @@ def generate_employees(*, run_seed, count, cfg=None):
        tier_name = tiers[idx - 1]
        tier_cfg = _tier_by_name(cfg, tier_name)

-        # Sample average rate uniformly within the tier's range.
-        avg_rate = rng.uniform(tier_cfg.rate_min, tier_cfg.rate_max)
-
-        domain_rates = _distribute_rates(rng, avg_rate)
+        domain_rates = _sample_domain_rates(rng, max_rate=tier_cfg.rate_max)
        rates = dict(zip(_ALL_DOMAINS, domain_rates))

        employees.append(