diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 1585613..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/plots/hard_1_greedy_bot_funds.png b/plots/hard_1_greedy_bot_funds.png new file mode 100644 index 0000000..19d7a1d Binary files /dev/null and b/plots/hard_1_greedy_bot_funds.png differ diff --git a/plots/hard_blind_greedy_3seeds.png b/plots/hard_blind_greedy_3seeds.png new file mode 100644 index 0000000..d247ce9 Binary files /dev/null and b/plots/hard_blind_greedy_3seeds.png differ diff --git a/plots/hard_greedy_bot_3seeds.png b/plots/hard_greedy_bot_3seeds.png new file mode 100644 index 0000000..a2de73f Binary files /dev/null and b/plots/hard_greedy_bot_3seeds.png differ diff --git a/scripts/bot_runner.py b/scripts/bot_runner.py index 0c04b95..eb4cdcb 100644 --- a/scripts/bot_runner.py +++ b/scripts/bot_runner.py @@ -49,6 +49,7 @@ from yc_bench.services.seed_world import SeedWorldRequest, seed_world_transactio CONFIGS = ["medium", "hard", "nightmare"] SEEDS = [1, 2, 3] +MAX_TASK_CYCLES = None # No cap — bot plays until horizon end @dataclass @@ -60,25 +61,31 @@ class CandidateTask: is_completable: bool -def estimate_completion_hours(task_reqs, employee_skills, n_concurrent_tasks=1): - """Estimate hours to complete task with all employees assigned.""" - domain_rates = {} - for req in task_reqs: - domain = req["domain"] - total_rate = Decimal("0") - for emp in employee_skills: - rate = emp.get(domain, Decimal("0")) - total_rate += rate / Decimal(n_concurrent_tasks) - domain_rates[domain] = total_rate +# Tier-average rates: E[uniform(0, max_rate)] = max_rate / 2. +# The LLM agent only sees tier + salary, not actual per-domain rates. +_TIER_AVG_RATE = { + "junior": Decimal("2.0"), # uniform(0, 4) => E=2.0 + "mid": Decimal("3.5"), # uniform(0, 7) => E=3.5 + "senior": Decimal("5.0"), # uniform(0, 10) => E=5.0 +} + + +def estimate_completion_hours(task_reqs, employee_tiers, n_concurrent_tasks=1): + """Estimate hours to complete task using tier-average rates (blind to actual skills). + + employee_tiers is a list of tier strings like ["junior", "mid", "senior", ...]. + Each employee is assumed to contribute their tier's average rate to every domain. + """ + total_rate = sum(_TIER_AVG_RATE[t] for t in employee_tiers) + effective_rate = total_rate / Decimal(n_concurrent_tasks) + + if effective_rate <= 0: + return None max_hours = Decimal("0") for req in task_reqs: - domain = req["domain"] qty = Decimal(str(req["required_qty"])) - rate = domain_rates.get(domain, Decimal("0")) - if rate <= 0: - return None - hours = qty / rate + hours = qty / effective_rate if hours > max_hours: max_hours = hours return max_hours @@ -90,13 +97,14 @@ def _compute_deadline(accepted_at, max_domain_qty, cfg): return add_business_hours(accepted_at, Decimal(str(biz_days)) * Decimal(str(work_hours))) -def _build_candidates(db, company_id, sim_state, world_cfg, emp_skills): +def _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers): """Build CandidateTask list from the same limited market window the LLM sees. Mirrors the LLM's constraints: - Only sees `market_browse_default_limit` tasks (default 50), not the full market - Respects prestige requirements (per-domain gating) - Respects trust requirements (can't accept tasks above current trust level) + - Uses tier-average rates (blind to actual per-domain skills) """ from yc_bench.db.models.client import ClientTrust @@ -134,8 +142,6 @@ def _build_candidates(db, company_id, sim_state, world_cfg, emp_skills): if accessible: break - all_skills = [{d: r for d, r in e["skills"].items()} for e in emp_skills] - candidates = [] for task in market_tasks: reqs = db.query(TaskRequirement).filter( @@ -159,7 +165,7 @@ def _build_candidates(db, company_id, sim_state, world_cfg, emp_skills): max_domain_qty = max(float(r.required_qty) for r in reqs) task_reqs = [{"domain": r.domain, "required_qty": float(r.required_qty)} for r in reqs] - completion_hours = estimate_completion_hours(task_reqs, all_skills, n_concurrent_tasks=1) + completion_hours = estimate_completion_hours(task_reqs, employee_tiers, n_concurrent_tasks=1) is_completable = False if completion_hours is not None: @@ -322,17 +328,12 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn) break continue - # Get employees and build candidates + # Get employees — only tier info (same as LLM agent sees) employees = db.query(Employee).filter(Employee.company_id == company_id).all() - emp_skills = [] - for emp in employees: - skills = db.query(EmployeeSkillRate).filter( - EmployeeSkillRate.employee_id == emp.id - ).all() - skill_map = {s.domain: Decimal(s.rate_domain_per_hour) for s in skills} - emp_skills.append({"id": emp.id, "skills": skill_map}) + employee_tiers = [emp.tier for emp in employees] + employee_ids = [emp.id for emp in employees] - candidates, max_prestige = _build_candidates(db, company_id, sim_state, world_cfg, emp_skills) + candidates, max_prestige = _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers) completable = [c for c in candidates if c.is_completable] context = { @@ -445,10 +446,10 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn) )) # Assign ALL employees - for e in emp_skills: + for eid in employee_ids: db.add(TaskAssignment( task_id=best_task.id, - employee_id=e["id"], + employee_id=eid, assigned_at=sim_state.sim_time, )) db.flush() diff --git a/scripts/greedy_bot.py b/scripts/greedy_bot.py deleted file mode 100644 index cff343e..0000000 --- a/scripts/greedy_bot.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Greedy bot shim — delegates to bot_runner.py. - -Usage: - uv run python scripts/greedy_bot.py -""" -from __future__ import annotations - -import sys -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).parent.parent / "src")) -sys.path.insert(0, str(Path(__file__).parent)) - -from bot_runner import CONFIGS, SEEDS, STRATEGIES, run_bot - - -def main(): - slug, strategy_fn = STRATEGIES["greedy"] - print("Running greedy bot across all configs and seeds...\n") - results = [] - - for config_name in CONFIGS: - for seed in SEEDS: - print(f" {config_name} seed={seed} ...", end=" ", flush=True) - r = run_bot(config_name, seed, slug, strategy_fn) - results.append(r) - - if r["bankrupt"]: - tag = "BANKRUPT" - elif r["final_balance_cents"] >= 1_000_000_00: - tag = f"${r['final_balance_cents']/100:,.0f}" - else: - tag = f"${r['final_balance_cents']/100:,.0f}" - - print(f"{tag} | {r['tasks_completed']} OK, {r['tasks_failed']} fail | prestige {r['max_prestige']:.1f} | {r['turns']} turns") - - print(f"\n{'Config':<12} {'Seed':<5} {'Final Balance':>14} {'OK':>4} {'Fail':>5} {'Prestige':>9}") - print("-" * 55) - for r in results: - fb = "BANKRUPT" if r["bankrupt"] else f"${r['final_balance_cents']/100:,.0f}" - print(f"{r['config']:<12} {r['seed']:<5} {fb:>14} {r['tasks_completed']:>4} {r['tasks_failed']:>5} {r['max_prestige']:>8.1f}") - - bankrupt_count = sum(1 for r in results if r["bankrupt"]) - print(f"\nBankruptcies: {bankrupt_count}/{len(results)}") - - -if __name__ == "__main__": - main() diff --git a/src/yc_bench/config/presets/easy.toml b/src/yc_bench/config/presets/easy.toml index 7345e09..8457754 100644 --- a/src/yc_bench/config/presets/easy.toml +++ b/src/yc_bench/config/presets/easy.toml @@ -31,7 +31,7 @@ auto_advance_after_turns = 8 initial_funds_cents = 20_000_000 # $200,000 # Inherits num_employees=10, num_market_tasks=200 from default. -# Moderate deadlines: 100 qty/day → 10-day deadline for mode task. +# Moderate deadlines: 1000/100 = 10 days. Comfortable margin. deadline_qty_per_day = 100.0 # Original (un-hardened) penalties — costly but not catastrophic. diff --git a/src/yc_bench/config/presets/hard.toml b/src/yc_bench/config/presets/hard.toml index 4a5c734..180c0f0 100644 --- a/src/yc_bench/config/presets/hard.toml +++ b/src/yc_bench/config/presets/hard.toml @@ -44,8 +44,8 @@ initial_funds_cents = 10_000_000 # $100,000 — must reach prestige 3 by mont # Inherits num_employees=10, num_market_tasks=200 from default. # Tight deadlines: 2000/220 = 9.1 days. -# 1 task with 5 per domain → 8.7 days. Just fits. -# 2 concurrent tasks → 17.4 days. Guaranteed miss. +# With domain specialization (some employees have 0 in some domains), +# effective team per domain is smaller — completion time varies by task. deadline_qty_per_day = 220.0 # Stiff penalties — mistakes cost real prestige. diff --git a/src/yc_bench/config/presets/medium.toml b/src/yc_bench/config/presets/medium.toml index a5853cc..cd1c265 100644 --- a/src/yc_bench/config/presets/medium.toml +++ b/src/yc_bench/config/presets/medium.toml @@ -38,9 +38,7 @@ auto_advance_after_turns = 8 [world] # Inherits num_employees=10, num_market_tasks=200 from default. -# Deadline uses max per-domain qty. 1500/150 = 10 days. -# 1 task with 5 per domain → 6.5 days. Comfortable. -# 2 concurrent tasks → 13 days. Miss. +# Deadline: 1500/150 = 10 days. Moderate pressure. deadline_qty_per_day = 150.0 # Real penalties — failing costs prestige, cancelling costs more. diff --git a/src/yc_bench/config/presets/nightmare.toml b/src/yc_bench/config/presets/nightmare.toml index 97b4f48..13d0aec 100644 --- a/src/yc_bench/config/presets/nightmare.toml +++ b/src/yc_bench/config/presets/nightmare.toml @@ -53,8 +53,7 @@ initial_funds_cents = 8_000_000 # $80,000 — razor-thin runway # Inherits num_employees=10, num_market_tasks=200 from default. # Razor deadlines: 2500/220 = 11.4 days. -# 1 task with 5 per domain → 10.9 days. Barely fits. -# 2 concurrent tasks → 21.8 days. Guaranteed miss. +# With domain specialization, effective team is smaller — razor-tight. deadline_qty_per_day = 220.0 # Catastrophic penalties — there is no good exit from a bad accept. diff --git a/src/yc_bench/config/presets/tutorial.toml b/src/yc_bench/config/presets/tutorial.toml index 3b7f8c2..6e2e17b 100644 --- a/src/yc_bench/config/presets/tutorial.toml +++ b/src/yc_bench/config/presets/tutorial.toml @@ -31,7 +31,7 @@ auto_advance_after_turns = 5 initial_funds_cents = 25_000_000 # $250,000 — very forgiving buffer # Inherits num_employees=10, num_market_tasks=200 from default. -# Generous deadlines: 50 qty/day → mode task gets 12-day deadline. +# Generous deadlines: 600/50 = 12 days. Very comfortable. deadline_qty_per_day = 50.0 # Negligible penalties — mistakes barely hurt. diff --git a/src/yc_bench/core/handlers/task_complete.py b/src/yc_bench/core/handlers/task_complete.py index ced3820..7c17677 100644 --- a/src/yc_bench/core/handlers/task_complete.py +++ b/src/yc_bench/core/handlers/task_complete.py @@ -97,9 +97,11 @@ def handle_task_complete(db: Session, event: SimEvent, sim_time) -> TaskComplete EmployeeSkillRate.employee_id == a.employee_id, EmployeeSkillRate.domain == domain, ).one_or_none() - if skill is not None and skill.rate_domain_per_hour < wc.skill_rate_max: - boost = skill.rate_domain_per_hour * task.skill_boost_pct - skill.rate_domain_per_hour = min(wc.skill_rate_max, skill.rate_domain_per_hour + boost) + if skill is not None: + skill.rate_domain_per_hour = min( + skill.rate_domain_per_hour + task.skill_boost_pct, + Decimal(str(wc.skill_rate_max)), + ) # Salary bump: small raise for each employee who contributed to this task if wc.salary_bump_pct > 0: diff --git a/src/yc_bench/core/progress.py b/src/yc_bench/core/progress.py index 72bf1ef..774b053 100644 --- a/src/yc_bench/core/progress.py +++ b/src/yc_bench/core/progress.py @@ -65,7 +65,8 @@ def _rates_by_employee_domain(rates): m[(r.employee_id, r.domain)] = r.rate_domain_per_hour return m -def _effective_rate_for_task_domain(*, task_id, domain, assignments, assignment_counts, base_rates): +def _effective_rate_for_task_domain(*, task_id, domain, assignments, + assignment_counts, base_rates): total = Decimal("0") for a in assignments: if a.task_id != task_id: @@ -226,7 +227,7 @@ def compute_effective_rates(db, company_id): for a in assignments: assignments_by_task.setdefault(a.task_id, []).append(a) assignment_counts[a.employee_id] = assignment_counts.get(a.employee_id, 0) + 1 - + employee_ids = list(assignment_counts.keys()) skill_rows = db.query(EmployeeSkillRate).filter(EmployeeSkillRate.employee_id.in_(employee_ids)).all() @@ -243,7 +244,7 @@ def compute_effective_rates(db, company_id): continue base = base_rates.get((a.employee_id, req.domain), Decimal("0")) total += base / Decimal(k) - + out.append(EffectiveRate( task_id=req.task_id, domain=req.domain, diff --git a/src/yc_bench/services/generate_employees.py b/src/yc_bench/services/generate_employees.py index fc2e0fa..b919057 100644 --- a/src/yc_bench/services/generate_employees.py +++ b/src/yc_bench/services/generate_employees.py @@ -1,6 +1,5 @@ from __future__ import annotations -import math from dataclasses import dataclass from ..config.schema import WorldConfig @@ -18,9 +17,6 @@ _TIER_SEQUENCE = [ "senior", "senior", ] -_MIN_RATE = 1.0 -_MAX_RATE = 10.0 - @dataclass(frozen=True) class GeneratedEmployee: @@ -47,49 +43,9 @@ def _sample_salary_cents(rng, cfg, tier_name): return sample_right_skew_triangular_int(rng, tier.min_cents, tier.max_cents) -def _dirichlet_sample(rng, alpha, k): - """Sample from Dirichlet(alpha, ..., alpha) with k components.""" - raw = [rng.gammavariate(alpha, 1.0) for _ in range(k)] - total = sum(raw) - if total == 0: - return [1.0 / k] * k - return [x / total for x in raw] - - -def _distribute_rates(rng, avg_rate, dirichlet_alpha=0.3): - """Distribute a rate budget across domains with spiky concentration. - - Each domain gets at least _MIN_RATE. The extra budget is split via - Dirichlet(alpha) so that one or two domains can be dramatically higher - than the rest — a junior can secretly be a superstar in one domain. - Individual rates are capped at _MAX_RATE. - """ - total_budget = avg_rate * _NUM_DOMAINS - extra = total_budget - _NUM_DOMAINS * _MIN_RATE - - if extra <= 0: - return [_MIN_RATE] * _NUM_DOMAINS - - proportions = _dirichlet_sample(rng, dirichlet_alpha, _NUM_DOMAINS) - rates = [_MIN_RATE + extra * p for p in proportions] - - # Cap at _MAX_RATE and redistribute excess iteratively. - for _ in range(5): - overflow = 0.0 - uncapped = [] - for i in range(_NUM_DOMAINS): - if rates[i] > _MAX_RATE: - overflow += rates[i] - _MAX_RATE - rates[i] = _MAX_RATE - else: - uncapped.append(i) - if overflow <= 0 or not uncapped: - break - share = overflow / len(uncapped) - for i in uncapped: - rates[i] += share - - return [round(r, 4) for r in rates] +def _sample_domain_rates(rng, max_rate): + """Sample each domain's rate independently from 0 to max_rate.""" + return [round(rng.uniform(0, max_rate), 4) for _ in range(_NUM_DOMAINS)] def generate_employees(*, run_seed, count, cfg=None): @@ -112,10 +68,7 @@ def generate_employees(*, run_seed, count, cfg=None): tier_name = tiers[idx - 1] tier_cfg = _tier_by_name(cfg, tier_name) - # Sample average rate uniformly within the tier's range. - avg_rate = rng.uniform(tier_cfg.rate_min, tier_cfg.rate_max) - - domain_rates = _distribute_rates(rng, avg_rate) + domain_rates = _sample_domain_rates(rng, max_rate=tier_cfg.rate_max) rates = dict(zip(_ALL_DOMAINS, domain_rates)) employees.append(