mirror of
https://github.com/collinear-ai/yc-bench.git
synced 2026-05-01 17:45:20 +00:00
Merge upstream/main: greedy baseline fix + additive skill boost
Resolved conflicts — combined best of both: - bot_runner.py: kept our trust-aware candidate building + upstream's tier-avg rates + no task cap - task_complete.py: upstream's additive skill boost (nerfs greedy snowball) + our configurable cap (wc.skill_rate_max instead of hardcoded 10) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
commit
d28ccb1bb2
14 changed files with 50 additions and 144 deletions
|
|
@ -31,7 +31,7 @@ auto_advance_after_turns = 8
|
|||
initial_funds_cents = 20_000_000 # $200,000
|
||||
# Inherits num_employees=10, num_market_tasks=200 from default.
|
||||
|
||||
# Moderate deadlines: 100 qty/day → 10-day deadline for mode task.
|
||||
# Moderate deadlines: 1000/100 = 10 days. Comfortable margin.
|
||||
deadline_qty_per_day = 100.0
|
||||
|
||||
# Original (un-hardened) penalties — costly but not catastrophic.
|
||||
|
|
|
|||
|
|
@ -44,8 +44,8 @@ initial_funds_cents = 10_000_000 # $100,000 — must reach prestige 3 by mont
|
|||
# Inherits num_employees=10, num_market_tasks=200 from default.
|
||||
|
||||
# Tight deadlines: 2000/220 = 9.1 days.
|
||||
# 1 task with 5 per domain → 8.7 days. Just fits.
|
||||
# 2 concurrent tasks → 17.4 days. Guaranteed miss.
|
||||
# With domain specialization (some employees have 0 in some domains),
|
||||
# effective team per domain is smaller — completion time varies by task.
|
||||
deadline_qty_per_day = 220.0
|
||||
|
||||
# Stiff penalties — mistakes cost real prestige.
|
||||
|
|
|
|||
|
|
@ -38,9 +38,7 @@ auto_advance_after_turns = 8
|
|||
[world]
|
||||
# Inherits num_employees=10, num_market_tasks=200 from default.
|
||||
|
||||
# Deadline uses max per-domain qty. 1500/150 = 10 days.
|
||||
# 1 task with 5 per domain → 6.5 days. Comfortable.
|
||||
# 2 concurrent tasks → 13 days. Miss.
|
||||
# Deadline: 1500/150 = 10 days. Moderate pressure.
|
||||
deadline_qty_per_day = 150.0
|
||||
|
||||
# Real penalties — failing costs prestige, cancelling costs more.
|
||||
|
|
|
|||
|
|
@ -53,8 +53,7 @@ initial_funds_cents = 8_000_000 # $80,000 — razor-thin runway
|
|||
# Inherits num_employees=10, num_market_tasks=200 from default.
|
||||
|
||||
# Razor deadlines: 2500/220 = 11.4 days.
|
||||
# 1 task with 5 per domain → 10.9 days. Barely fits.
|
||||
# 2 concurrent tasks → 21.8 days. Guaranteed miss.
|
||||
# With domain specialization, effective team is smaller — razor-tight.
|
||||
deadline_qty_per_day = 220.0
|
||||
|
||||
# Catastrophic penalties — there is no good exit from a bad accept.
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ auto_advance_after_turns = 5
|
|||
initial_funds_cents = 25_000_000 # $250,000 — very forgiving buffer
|
||||
# Inherits num_employees=10, num_market_tasks=200 from default.
|
||||
|
||||
# Generous deadlines: 50 qty/day → mode task gets 12-day deadline.
|
||||
# Generous deadlines: 600/50 = 12 days. Very comfortable.
|
||||
deadline_qty_per_day = 50.0
|
||||
|
||||
# Negligible penalties — mistakes barely hurt.
|
||||
|
|
|
|||
|
|
@ -97,9 +97,11 @@ def handle_task_complete(db: Session, event: SimEvent, sim_time) -> TaskComplete
|
|||
EmployeeSkillRate.employee_id == a.employee_id,
|
||||
EmployeeSkillRate.domain == domain,
|
||||
).one_or_none()
|
||||
if skill is not None and skill.rate_domain_per_hour < wc.skill_rate_max:
|
||||
boost = skill.rate_domain_per_hour * task.skill_boost_pct
|
||||
skill.rate_domain_per_hour = min(wc.skill_rate_max, skill.rate_domain_per_hour + boost)
|
||||
if skill is not None:
|
||||
skill.rate_domain_per_hour = min(
|
||||
skill.rate_domain_per_hour + task.skill_boost_pct,
|
||||
Decimal(str(wc.skill_rate_max)),
|
||||
)
|
||||
|
||||
# Salary bump: small raise for each employee who contributed to this task
|
||||
if wc.salary_bump_pct > 0:
|
||||
|
|
|
|||
|
|
@ -65,7 +65,8 @@ def _rates_by_employee_domain(rates):
|
|||
m[(r.employee_id, r.domain)] = r.rate_domain_per_hour
|
||||
return m
|
||||
|
||||
def _effective_rate_for_task_domain(*, task_id, domain, assignments, assignment_counts, base_rates):
|
||||
def _effective_rate_for_task_domain(*, task_id, domain, assignments,
|
||||
assignment_counts, base_rates):
|
||||
total = Decimal("0")
|
||||
for a in assignments:
|
||||
if a.task_id != task_id:
|
||||
|
|
@ -226,7 +227,7 @@ def compute_effective_rates(db, company_id):
|
|||
for a in assignments:
|
||||
assignments_by_task.setdefault(a.task_id, []).append(a)
|
||||
assignment_counts[a.employee_id] = assignment_counts.get(a.employee_id, 0) + 1
|
||||
|
||||
|
||||
employee_ids = list(assignment_counts.keys())
|
||||
skill_rows = db.query(EmployeeSkillRate).filter(EmployeeSkillRate.employee_id.in_(employee_ids)).all()
|
||||
|
||||
|
|
@ -243,7 +244,7 @@ def compute_effective_rates(db, company_id):
|
|||
continue
|
||||
base = base_rates.get((a.employee_id, req.domain), Decimal("0"))
|
||||
total += base / Decimal(k)
|
||||
|
||||
|
||||
out.append(EffectiveRate(
|
||||
task_id=req.task_id,
|
||||
domain=req.domain,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ..config.schema import WorldConfig
|
||||
|
|
@ -18,9 +17,6 @@ _TIER_SEQUENCE = [
|
|||
"senior", "senior",
|
||||
]
|
||||
|
||||
_MIN_RATE = 1.0
|
||||
_MAX_RATE = 10.0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GeneratedEmployee:
|
||||
|
|
@ -47,49 +43,9 @@ def _sample_salary_cents(rng, cfg, tier_name):
|
|||
return sample_right_skew_triangular_int(rng, tier.min_cents, tier.max_cents)
|
||||
|
||||
|
||||
def _dirichlet_sample(rng, alpha, k):
|
||||
"""Sample from Dirichlet(alpha, ..., alpha) with k components."""
|
||||
raw = [rng.gammavariate(alpha, 1.0) for _ in range(k)]
|
||||
total = sum(raw)
|
||||
if total == 0:
|
||||
return [1.0 / k] * k
|
||||
return [x / total for x in raw]
|
||||
|
||||
|
||||
def _distribute_rates(rng, avg_rate, dirichlet_alpha=0.3):
|
||||
"""Distribute a rate budget across domains with spiky concentration.
|
||||
|
||||
Each domain gets at least _MIN_RATE. The extra budget is split via
|
||||
Dirichlet(alpha) so that one or two domains can be dramatically higher
|
||||
than the rest — a junior can secretly be a superstar in one domain.
|
||||
Individual rates are capped at _MAX_RATE.
|
||||
"""
|
||||
total_budget = avg_rate * _NUM_DOMAINS
|
||||
extra = total_budget - _NUM_DOMAINS * _MIN_RATE
|
||||
|
||||
if extra <= 0:
|
||||
return [_MIN_RATE] * _NUM_DOMAINS
|
||||
|
||||
proportions = _dirichlet_sample(rng, dirichlet_alpha, _NUM_DOMAINS)
|
||||
rates = [_MIN_RATE + extra * p for p in proportions]
|
||||
|
||||
# Cap at _MAX_RATE and redistribute excess iteratively.
|
||||
for _ in range(5):
|
||||
overflow = 0.0
|
||||
uncapped = []
|
||||
for i in range(_NUM_DOMAINS):
|
||||
if rates[i] > _MAX_RATE:
|
||||
overflow += rates[i] - _MAX_RATE
|
||||
rates[i] = _MAX_RATE
|
||||
else:
|
||||
uncapped.append(i)
|
||||
if overflow <= 0 or not uncapped:
|
||||
break
|
||||
share = overflow / len(uncapped)
|
||||
for i in uncapped:
|
||||
rates[i] += share
|
||||
|
||||
return [round(r, 4) for r in rates]
|
||||
def _sample_domain_rates(rng, max_rate):
|
||||
"""Sample each domain's rate independently from 0 to max_rate."""
|
||||
return [round(rng.uniform(0, max_rate), 4) for _ in range(_NUM_DOMAINS)]
|
||||
|
||||
|
||||
def generate_employees(*, run_seed, count, cfg=None):
|
||||
|
|
@ -112,10 +68,7 @@ def generate_employees(*, run_seed, count, cfg=None):
|
|||
tier_name = tiers[idx - 1]
|
||||
tier_cfg = _tier_by_name(cfg, tier_name)
|
||||
|
||||
# Sample average rate uniformly within the tier's range.
|
||||
avg_rate = rng.uniform(tier_cfg.rate_min, tier_cfg.rate_max)
|
||||
|
||||
domain_rates = _distribute_rates(rng, avg_rate)
|
||||
domain_rates = _sample_domain_rates(rng, max_rate=tier_cfg.rate_max)
|
||||
rates = dict(zip(_ALL_DOMAINS, domain_rates))
|
||||
|
||||
employees.append(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue