mirror of
https://github.com/collinear-ai/yc-bench.git
synced 2026-04-29 17:35:12 +00:00
Merge upstream/main: greedy baseline fix + additive skill boost
Resolved conflicts — combined best of both: - bot_runner.py: kept our trust-aware candidate building + upstream's tier-avg rates + no task cap - task_complete.py: upstream's additive skill boost (nerfs greedy snowball) + our configurable cap (wc.skill_rate_max instead of hardcoded 10) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
commit
d28ccb1bb2
14 changed files with 50 additions and 144 deletions
|
|
@ -49,6 +49,7 @@ from yc_bench.services.seed_world import SeedWorldRequest, seed_world_transactio
|
|||
CONFIGS = ["medium", "hard", "nightmare"]
|
||||
SEEDS = [1, 2, 3]
|
||||
|
||||
MAX_TASK_CYCLES = None # No cap — bot plays until horizon end
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -60,25 +61,31 @@ class CandidateTask:
|
|||
is_completable: bool
|
||||
|
||||
|
||||
def estimate_completion_hours(task_reqs, employee_skills, n_concurrent_tasks=1):
|
||||
"""Estimate hours to complete task with all employees assigned."""
|
||||
domain_rates = {}
|
||||
for req in task_reqs:
|
||||
domain = req["domain"]
|
||||
total_rate = Decimal("0")
|
||||
for emp in employee_skills:
|
||||
rate = emp.get(domain, Decimal("0"))
|
||||
total_rate += rate / Decimal(n_concurrent_tasks)
|
||||
domain_rates[domain] = total_rate
|
||||
# Tier-average rates: E[uniform(0, max_rate)] = max_rate / 2.
|
||||
# The LLM agent only sees tier + salary, not actual per-domain rates.
|
||||
_TIER_AVG_RATE = {
|
||||
"junior": Decimal("2.0"), # uniform(0, 4) => E=2.0
|
||||
"mid": Decimal("3.5"), # uniform(0, 7) => E=3.5
|
||||
"senior": Decimal("5.0"), # uniform(0, 10) => E=5.0
|
||||
}
|
||||
|
||||
|
||||
def estimate_completion_hours(task_reqs, employee_tiers, n_concurrent_tasks=1):
|
||||
"""Estimate hours to complete task using tier-average rates (blind to actual skills).
|
||||
|
||||
employee_tiers is a list of tier strings like ["junior", "mid", "senior", ...].
|
||||
Each employee is assumed to contribute their tier's average rate to every domain.
|
||||
"""
|
||||
total_rate = sum(_TIER_AVG_RATE[t] for t in employee_tiers)
|
||||
effective_rate = total_rate / Decimal(n_concurrent_tasks)
|
||||
|
||||
if effective_rate <= 0:
|
||||
return None
|
||||
|
||||
max_hours = Decimal("0")
|
||||
for req in task_reqs:
|
||||
domain = req["domain"]
|
||||
qty = Decimal(str(req["required_qty"]))
|
||||
rate = domain_rates.get(domain, Decimal("0"))
|
||||
if rate <= 0:
|
||||
return None
|
||||
hours = qty / rate
|
||||
hours = qty / effective_rate
|
||||
if hours > max_hours:
|
||||
max_hours = hours
|
||||
return max_hours
|
||||
|
|
@ -90,13 +97,14 @@ def _compute_deadline(accepted_at, max_domain_qty, cfg):
|
|||
return add_business_hours(accepted_at, Decimal(str(biz_days)) * Decimal(str(work_hours)))
|
||||
|
||||
|
||||
def _build_candidates(db, company_id, sim_state, world_cfg, emp_skills):
|
||||
def _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers):
|
||||
"""Build CandidateTask list from the same limited market window the LLM sees.
|
||||
|
||||
Mirrors the LLM's constraints:
|
||||
- Only sees `market_browse_default_limit` tasks (default 50), not the full market
|
||||
- Respects prestige requirements (per-domain gating)
|
||||
- Respects trust requirements (can't accept tasks above current trust level)
|
||||
- Uses tier-average rates (blind to actual per-domain skills)
|
||||
"""
|
||||
from yc_bench.db.models.client import ClientTrust
|
||||
|
||||
|
|
@ -134,8 +142,6 @@ def _build_candidates(db, company_id, sim_state, world_cfg, emp_skills):
|
|||
if accessible:
|
||||
break
|
||||
|
||||
all_skills = [{d: r for d, r in e["skills"].items()} for e in emp_skills]
|
||||
|
||||
candidates = []
|
||||
for task in market_tasks:
|
||||
reqs = db.query(TaskRequirement).filter(
|
||||
|
|
@ -159,7 +165,7 @@ def _build_candidates(db, company_id, sim_state, world_cfg, emp_skills):
|
|||
max_domain_qty = max(float(r.required_qty) for r in reqs)
|
||||
task_reqs = [{"domain": r.domain, "required_qty": float(r.required_qty)} for r in reqs]
|
||||
|
||||
completion_hours = estimate_completion_hours(task_reqs, all_skills, n_concurrent_tasks=1)
|
||||
completion_hours = estimate_completion_hours(task_reqs, employee_tiers, n_concurrent_tasks=1)
|
||||
|
||||
is_completable = False
|
||||
if completion_hours is not None:
|
||||
|
|
@ -322,17 +328,12 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn)
|
|||
break
|
||||
continue
|
||||
|
||||
# Get employees and build candidates
|
||||
# Get employees — only tier info (same as LLM agent sees)
|
||||
employees = db.query(Employee).filter(Employee.company_id == company_id).all()
|
||||
emp_skills = []
|
||||
for emp in employees:
|
||||
skills = db.query(EmployeeSkillRate).filter(
|
||||
EmployeeSkillRate.employee_id == emp.id
|
||||
).all()
|
||||
skill_map = {s.domain: Decimal(s.rate_domain_per_hour) for s in skills}
|
||||
emp_skills.append({"id": emp.id, "skills": skill_map})
|
||||
employee_tiers = [emp.tier for emp in employees]
|
||||
employee_ids = [emp.id for emp in employees]
|
||||
|
||||
candidates, max_prestige = _build_candidates(db, company_id, sim_state, world_cfg, emp_skills)
|
||||
candidates, max_prestige = _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers)
|
||||
completable = [c for c in candidates if c.is_completable]
|
||||
|
||||
context = {
|
||||
|
|
@ -445,10 +446,10 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn)
|
|||
))
|
||||
|
||||
# Assign ALL employees
|
||||
for e in emp_skills:
|
||||
for eid in employee_ids:
|
||||
db.add(TaskAssignment(
|
||||
task_id=best_task.id,
|
||||
employee_id=e["id"],
|
||||
employee_id=eid,
|
||||
assigned_at=sim_state.sim_time,
|
||||
))
|
||||
db.flush()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue