Merge upstream/main: greedy baseline fix + additive skill boost

Resolved conflicts — combined best of both:
- bot_runner.py: kept our trust-aware candidate building + upstream's tier-avg rates + no task cap
- task_complete.py: upstream's additive skill boost (nerfs greedy snowball) + our configurable cap (wc.skill_rate_max instead of hardcoded 10)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
alckasoc 2026-03-09 17:38:53 -07:00
commit d28ccb1bb2
14 changed files with 50 additions and 144 deletions

View file

@ -49,6 +49,7 @@ from yc_bench.services.seed_world import SeedWorldRequest, seed_world_transactio
CONFIGS = ["medium", "hard", "nightmare"]
SEEDS = [1, 2, 3]
MAX_TASK_CYCLES = None # No cap — bot plays until horizon end
@dataclass
@ -60,25 +61,31 @@ class CandidateTask:
is_completable: bool
def estimate_completion_hours(task_reqs, employee_skills, n_concurrent_tasks=1):
"""Estimate hours to complete task with all employees assigned."""
domain_rates = {}
for req in task_reqs:
domain = req["domain"]
total_rate = Decimal("0")
for emp in employee_skills:
rate = emp.get(domain, Decimal("0"))
total_rate += rate / Decimal(n_concurrent_tasks)
domain_rates[domain] = total_rate
# Tier-average rates: E[uniform(0, max_rate)] = max_rate / 2.
# The LLM agent only sees tier + salary, not actual per-domain rates.
_TIER_AVG_RATE = {
"junior": Decimal("2.0"), # uniform(0, 4) => E=2.0
"mid": Decimal("3.5"), # uniform(0, 7) => E=3.5
"senior": Decimal("5.0"), # uniform(0, 10) => E=5.0
}
def estimate_completion_hours(task_reqs, employee_tiers, n_concurrent_tasks=1):
"""Estimate hours to complete task using tier-average rates (blind to actual skills).
employee_tiers is a list of tier strings like ["junior", "mid", "senior", ...].
Each employee is assumed to contribute their tier's average rate to every domain.
"""
total_rate = sum(_TIER_AVG_RATE[t] for t in employee_tiers)
effective_rate = total_rate / Decimal(n_concurrent_tasks)
if effective_rate <= 0:
return None
max_hours = Decimal("0")
for req in task_reqs:
domain = req["domain"]
qty = Decimal(str(req["required_qty"]))
rate = domain_rates.get(domain, Decimal("0"))
if rate <= 0:
return None
hours = qty / rate
hours = qty / effective_rate
if hours > max_hours:
max_hours = hours
return max_hours
@ -90,13 +97,14 @@ def _compute_deadline(accepted_at, max_domain_qty, cfg):
return add_business_hours(accepted_at, Decimal(str(biz_days)) * Decimal(str(work_hours)))
def _build_candidates(db, company_id, sim_state, world_cfg, emp_skills):
def _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers):
"""Build CandidateTask list from the same limited market window the LLM sees.
Mirrors the LLM's constraints:
- Only sees `market_browse_default_limit` tasks (default 50), not the full market
- Respects prestige requirements (per-domain gating)
- Respects trust requirements (can't accept tasks above current trust level)
- Uses tier-average rates (blind to actual per-domain skills)
"""
from yc_bench.db.models.client import ClientTrust
@ -134,8 +142,6 @@ def _build_candidates(db, company_id, sim_state, world_cfg, emp_skills):
if accessible:
break
all_skills = [{d: r for d, r in e["skills"].items()} for e in emp_skills]
candidates = []
for task in market_tasks:
reqs = db.query(TaskRequirement).filter(
@ -159,7 +165,7 @@ def _build_candidates(db, company_id, sim_state, world_cfg, emp_skills):
max_domain_qty = max(float(r.required_qty) for r in reqs)
task_reqs = [{"domain": r.domain, "required_qty": float(r.required_qty)} for r in reqs]
completion_hours = estimate_completion_hours(task_reqs, all_skills, n_concurrent_tasks=1)
completion_hours = estimate_completion_hours(task_reqs, employee_tiers, n_concurrent_tasks=1)
is_completable = False
if completion_hours is not None:
@ -322,17 +328,12 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn)
break
continue
# Get employees and build candidates
# Get employees — only tier info (same as LLM agent sees)
employees = db.query(Employee).filter(Employee.company_id == company_id).all()
emp_skills = []
for emp in employees:
skills = db.query(EmployeeSkillRate).filter(
EmployeeSkillRate.employee_id == emp.id
).all()
skill_map = {s.domain: Decimal(s.rate_domain_per_hour) for s in skills}
emp_skills.append({"id": emp.id, "skills": skill_map})
employee_tiers = [emp.tier for emp in employees]
employee_ids = [emp.id for emp in employees]
candidates, max_prestige = _build_candidates(db, company_id, sim_state, world_cfg, emp_skills)
candidates, max_prestige = _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers)
completable = [c for c in candidates if c.is_completable]
context = {
@ -445,10 +446,10 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn)
))
# Assign ALL employees
for e in emp_skills:
for eid in employee_ids:
db.add(TaskAssignment(
task_id=best_task.id,
employee_id=e["id"],
employee_id=eid,
assigned_at=sim_state.sim_time,
))
db.flush()