Merge upstream/main: greedy baseline fix + additive skill boost

Resolved conflicts — combined best of both: - bot_runner.py: kept our trust-aware candidate building + upstream's tier-avg rates + no task cap - task_complete.py: upstream's additive skill boost (nerfs greedy snowball) + our configurable cap (wc.skill_rate_max instead of hardcoded 10) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-29 17:35:12 +00:00 · 2026-03-09 17:38:53 -07:00 · 2026-03-09 17:38:53 -07:00 · d28ccb1bb2
commit d28ccb1bb2
parent 11f4b89144 a38b9f4135
14 changed files with 50 additions and 144 deletions
--- a/scripts/bot_runner.py
+++ b/scripts/bot_runner.py
@ -49,6 +49,7 @@ from yc_bench.services.seed_world import SeedWorldRequest, seed_world_transactio
 CONFIGS = ["medium", "hard", "nightmare"]
 SEEDS = [1, 2, 3]

+MAX_TASK_CYCLES = None  # No cap — bot plays until horizon end


@dataclass
@ -60,25 +61,31 @@ class CandidateTask:
    is_completable: bool


-def estimate_completion_hours(task_reqs, employee_skills, n_concurrent_tasks=1):
-    """Estimate hours to complete task with all employees assigned."""
-    domain_rates = {}
-    for req in task_reqs:
-        domain = req["domain"]
-        total_rate = Decimal("0")
-        for emp in employee_skills:
-            rate = emp.get(domain, Decimal("0"))
-            total_rate += rate / Decimal(n_concurrent_tasks)
-        domain_rates[domain] = total_rate
+# Tier-average rates: E[uniform(0, max_rate)] = max_rate / 2.
+# The LLM agent only sees tier + salary, not actual per-domain rates.
+_TIER_AVG_RATE = {
+    "junior": Decimal("2.0"),   # uniform(0, 4) => E=2.0
+    "mid": Decimal("3.5"),      # uniform(0, 7) => E=3.5
+    "senior": Decimal("5.0"),   # uniform(0, 10) => E=5.0
+}
+
+
+def estimate_completion_hours(task_reqs, employee_tiers, n_concurrent_tasks=1):
+    """Estimate hours to complete task using tier-average rates (blind to actual skills).
+
+    employee_tiers is a list of tier strings like ["junior", "mid", "senior", ...].
+    Each employee is assumed to contribute their tier's average rate to every domain.
+    """
+    total_rate = sum(_TIER_AVG_RATE[t] for t in employee_tiers)
+    effective_rate = total_rate / Decimal(n_concurrent_tasks)
+
+    if effective_rate <= 0:
+        return None

    max_hours = Decimal("0")
    for req in task_reqs:
-        domain = req["domain"]
        qty = Decimal(str(req["required_qty"]))
-        rate = domain_rates.get(domain, Decimal("0"))
-        if rate <= 0:
-            return None
-        hours = qty / rate
+        hours = qty / effective_rate
        if hours > max_hours:
            max_hours = hours
    return max_hours
@ -90,13 +97,14 @@ def _compute_deadline(accepted_at, max_domain_qty, cfg):
    return add_business_hours(accepted_at, Decimal(str(biz_days)) * Decimal(str(work_hours)))


-def _build_candidates(db, company_id, sim_state, world_cfg, emp_skills):
+def _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers):
    """Build CandidateTask list from the same limited market window the LLM sees.

    Mirrors the LLM's constraints:
    - Only sees `market_browse_default_limit` tasks (default 50), not the full market
    - Respects prestige requirements (per-domain gating)
    - Respects trust requirements (can't accept tasks above current trust level)
+    - Uses tier-average rates (blind to actual per-domain skills)
    """
    from yc_bench.db.models.client import ClientTrust

@ -134,8 +142,6 @@ def _build_candidates(db, company_id, sim_state, world_cfg, emp_skills):
        if accessible:
            break

-    all_skills = [{d: r for d, r in e["skills"].items()} for e in emp_skills]
-
    candidates = []
    for task in market_tasks:
        reqs = db.query(TaskRequirement).filter(
@ -159,7 +165,7 @@ def _build_candidates(db, company_id, sim_state, world_cfg, emp_skills):
        max_domain_qty = max(float(r.required_qty) for r in reqs)
        task_reqs = [{"domain": r.domain, "required_qty": float(r.required_qty)} for r in reqs]

-        completion_hours = estimate_completion_hours(task_reqs, all_skills, n_concurrent_tasks=1)
+        completion_hours = estimate_completion_hours(task_reqs, employee_tiers, n_concurrent_tasks=1)

        is_completable = False
        if completion_hours is not None:
@ -322,17 +328,12 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn)
                    break
                continue

-            # Get employees and build candidates
+            # Get employees — only tier info (same as LLM agent sees)
            employees = db.query(Employee).filter(Employee.company_id == company_id).all()
-            emp_skills = []
-            for emp in employees:
-                skills = db.query(EmployeeSkillRate).filter(
-                    EmployeeSkillRate.employee_id == emp.id
-                ).all()
-                skill_map = {s.domain: Decimal(s.rate_domain_per_hour) for s in skills}
-                emp_skills.append({"id": emp.id, "skills": skill_map})
+            employee_tiers = [emp.tier for emp in employees]
+            employee_ids = [emp.id for emp in employees]

-            candidates, max_prestige = _build_candidates(db, company_id, sim_state, world_cfg, emp_skills)
+            candidates, max_prestige = _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers)
            completable = [c for c in candidates if c.is_completable]

            context = {
@ -445,10 +446,10 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn)
                ))

            # Assign ALL employees
-            for e in emp_skills:
+            for eid in employee_ids:
                db.add(TaskAssignment(
                    task_id=best_task.id,
-                    employee_id=e["id"],
+                    employee_id=eid,
                    assigned_at=sim_state.sim_time,
                ))
            db.flush()