Merge upstream/main: greedy baseline fix + additive skill boost

Resolved conflicts — combined best of both:
- bot_runner.py: kept our trust-aware candidate building + upstream's tier-avg rates + no task cap
- task_complete.py: upstream's additive skill boost (nerfs greedy snowball) + our configurable cap (wc.skill_rate_max instead of hardcoded 10)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
alckasoc 2026-03-09 17:38:53 -07:00
commit d28ccb1bb2
14 changed files with 50 additions and 144 deletions

View file

@ -31,7 +31,7 @@ auto_advance_after_turns = 8
initial_funds_cents = 20_000_000 # $200,000
# Inherits num_employees=10, num_market_tasks=200 from default.
# Moderate deadlines: 100 qty/day → 10-day deadline for mode task.
# Moderate deadlines: 1000/100 = 10 days. Comfortable margin.
deadline_qty_per_day = 100.0
# Original (un-hardened) penalties — costly but not catastrophic.

View file

@ -44,8 +44,8 @@ initial_funds_cents = 10_000_000 # $100,000 — must reach prestige 3 by mont
# Inherits num_employees=10, num_market_tasks=200 from default.
# Tight deadlines: 2000/220 = 9.1 days.
# 1 task with 5 per domain → 8.7 days. Just fits.
# 2 concurrent tasks → 17.4 days. Guaranteed miss.
# With domain specialization (some employees have 0 in some domains),
# effective team per domain is smaller — completion time varies by task.
deadline_qty_per_day = 220.0
# Stiff penalties — mistakes cost real prestige.

View file

@ -38,9 +38,7 @@ auto_advance_after_turns = 8
[world]
# Inherits num_employees=10, num_market_tasks=200 from default.
# Deadline uses max per-domain qty. 1500/150 = 10 days.
# 1 task with 5 per domain → 6.5 days. Comfortable.
# 2 concurrent tasks → 13 days. Miss.
# Deadline: 1500/150 = 10 days. Moderate pressure.
deadline_qty_per_day = 150.0
# Real penalties — failing costs prestige, cancelling costs more.

View file

@ -53,8 +53,7 @@ initial_funds_cents = 8_000_000 # $80,000 — razor-thin runway
# Inherits num_employees=10, num_market_tasks=200 from default.
# Razor deadlines: 2500/220 = 11.4 days.
# 1 task with 5 per domain → 10.9 days. Barely fits.
# 2 concurrent tasks → 21.8 days. Guaranteed miss.
# With domain specialization, effective team is smaller — razor-tight.
deadline_qty_per_day = 220.0
# Catastrophic penalties — there is no good exit from a bad accept.

View file

@ -31,7 +31,7 @@ auto_advance_after_turns = 5
initial_funds_cents = 25_000_000 # $250,000 — very forgiving buffer
# Inherits num_employees=10, num_market_tasks=200 from default.
# Generous deadlines: 50 qty/day → mode task gets 12-day deadline.
# Generous deadlines: 600/50 = 12 days. Very comfortable.
deadline_qty_per_day = 50.0
# Negligible penalties — mistakes barely hurt.

View file

@ -97,9 +97,11 @@ def handle_task_complete(db: Session, event: SimEvent, sim_time) -> TaskComplete
EmployeeSkillRate.employee_id == a.employee_id,
EmployeeSkillRate.domain == domain,
).one_or_none()
if skill is not None and skill.rate_domain_per_hour < wc.skill_rate_max:
boost = skill.rate_domain_per_hour * task.skill_boost_pct
skill.rate_domain_per_hour = min(wc.skill_rate_max, skill.rate_domain_per_hour + boost)
if skill is not None:
skill.rate_domain_per_hour = min(
skill.rate_domain_per_hour + task.skill_boost_pct,
Decimal(str(wc.skill_rate_max)),
)
# Salary bump: small raise for each employee who contributed to this task
if wc.salary_bump_pct > 0:

View file

@ -65,7 +65,8 @@ def _rates_by_employee_domain(rates):
m[(r.employee_id, r.domain)] = r.rate_domain_per_hour
return m
def _effective_rate_for_task_domain(*, task_id, domain, assignments, assignment_counts, base_rates):
def _effective_rate_for_task_domain(*, task_id, domain, assignments,
assignment_counts, base_rates):
total = Decimal("0")
for a in assignments:
if a.task_id != task_id:
@ -226,7 +227,7 @@ def compute_effective_rates(db, company_id):
for a in assignments:
assignments_by_task.setdefault(a.task_id, []).append(a)
assignment_counts[a.employee_id] = assignment_counts.get(a.employee_id, 0) + 1
employee_ids = list(assignment_counts.keys())
skill_rows = db.query(EmployeeSkillRate).filter(EmployeeSkillRate.employee_id.in_(employee_ids)).all()
@ -243,7 +244,7 @@ def compute_effective_rates(db, company_id):
continue
base = base_rates.get((a.employee_id, req.domain), Decimal("0"))
total += base / Decimal(k)
out.append(EffectiveRate(
task_id=req.task_id,
domain=req.domain,

View file

@ -1,6 +1,5 @@
from __future__ import annotations
import math
from dataclasses import dataclass
from ..config.schema import WorldConfig
@ -18,9 +17,6 @@ _TIER_SEQUENCE = [
"senior", "senior",
]
_MIN_RATE = 1.0
_MAX_RATE = 10.0
@dataclass(frozen=True)
class GeneratedEmployee:
@ -47,49 +43,9 @@ def _sample_salary_cents(rng, cfg, tier_name):
return sample_right_skew_triangular_int(rng, tier.min_cents, tier.max_cents)
def _dirichlet_sample(rng, alpha, k):
"""Sample from Dirichlet(alpha, ..., alpha) with k components."""
raw = [rng.gammavariate(alpha, 1.0) for _ in range(k)]
total = sum(raw)
if total == 0:
return [1.0 / k] * k
return [x / total for x in raw]
def _distribute_rates(rng, avg_rate, dirichlet_alpha=0.3):
"""Distribute a rate budget across domains with spiky concentration.
Each domain gets at least _MIN_RATE. The extra budget is split via
Dirichlet(alpha) so that one or two domains can be dramatically higher
than the rest a junior can secretly be a superstar in one domain.
Individual rates are capped at _MAX_RATE.
"""
total_budget = avg_rate * _NUM_DOMAINS
extra = total_budget - _NUM_DOMAINS * _MIN_RATE
if extra <= 0:
return [_MIN_RATE] * _NUM_DOMAINS
proportions = _dirichlet_sample(rng, dirichlet_alpha, _NUM_DOMAINS)
rates = [_MIN_RATE + extra * p for p in proportions]
# Cap at _MAX_RATE and redistribute excess iteratively.
for _ in range(5):
overflow = 0.0
uncapped = []
for i in range(_NUM_DOMAINS):
if rates[i] > _MAX_RATE:
overflow += rates[i] - _MAX_RATE
rates[i] = _MAX_RATE
else:
uncapped.append(i)
if overflow <= 0 or not uncapped:
break
share = overflow / len(uncapped)
for i in uncapped:
rates[i] += share
return [round(r, 4) for r in rates]
def _sample_domain_rates(rng, max_rate):
"""Sample each domain's rate independently from 0 to max_rate."""
return [round(rng.uniform(0, max_rate), 4) for _ in range(_NUM_DOMAINS)]
def generate_employees(*, run_seed, count, cfg=None):
@ -112,10 +68,7 @@ def generate_employees(*, run_seed, count, cfg=None):
tier_name = tiers[idx - 1]
tier_cfg = _tier_by_name(cfg, tier_name)
# Sample average rate uniformly within the tier's range.
avg_rate = rng.uniform(tier_cfg.rate_min, tier_cfg.rate_max)
domain_rates = _distribute_rates(rng, avg_rate)
domain_rates = _sample_domain_rates(rng, max_rate=tier_cfg.rate_max)
rates = dict(zip(_ALL_DOMAINS, domain_rates))
employees.append(