diff --git a/scripts/bot_runner.py b/scripts/bot_runner.py index eb4cdcb..01f608d 100644 --- a/scripts/bot_runner.py +++ b/scripts/bot_runner.py @@ -1,11 +1,17 @@ """Bot runner: plays YC-Bench using direct DB access with pluggable strategies. Strategies: - greedy — pick highest reward among completable tasks - random — pick randomly among completable tasks (deterministic via RngStreams) - throughput — pick highest reward/hour among completable tasks + greedy — pick highest reward among accessible tasks + random — pick randomly among accessible tasks (deterministic via RngStreams) + throughput — pick highest reward/hour among accessible tasks prestige — phase 1: climb prestige fast, phase 2: throughput +The bot operates under the same constraints as the LLM agent: + - Same market visibility (browse limit, prestige/trust gating) + - Same economic rules (trust multiplier, work reduction, payroll, salary bumps) + - Runs multiple concurrent tasks (like the LLM agent does) + - Must have active tasks before time advances (same as LLM sim resume block) + Usage: uv run python scripts/bot_runner.py # all bots, all configs, all seeds uv run python scripts/bot_runner.py --bot greedy # just greedy @@ -40,16 +46,13 @@ from yc_bench.services.generate_tasks import generate_replacement_task from yc_bench.services.rng import RngStreams from yc_bench.services.seed_world import SeedWorldRequest, seed_world_transactional -# Bot throughput cap: the bot has zero overhead per task (no API calls, no -# thinking) so it can complete ~3000 tasks/year. Real LLMs average ~4.5–5.2 -# No artificial task cap. The bot is subject to the same economic dynamics -# as the LLM: salary bumps, trust system, and payroll naturally limit -# throughput and profitability. - CONFIGS = ["medium", "hard", "nightmare"] SEEDS = [1, 2, 3] -MAX_TASK_CYCLES = None # No cap — bot plays until horizon end +# Baseline runs 1 task at a time — simple sequential greedy with no +# workload management. This is the "zero strategy" floor that any +# competent LLM agent should beat. +MAX_CONCURRENT_TASKS = 1 @dataclass @@ -97,7 +100,7 @@ def _compute_deadline(accepted_at, max_domain_qty, cfg): return add_business_hours(accepted_at, Decimal(str(biz_days)) * Decimal(str(work_hours))) -def _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers): +def _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers, n_active=0): """Build CandidateTask list from the same limited market window the LLM sees. Mirrors the LLM's constraints: @@ -112,7 +115,7 @@ def _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers): CompanyPrestige.company_id == company_id ).all() prestige_map = {p.domain: float(p.prestige_level) for p in prestige_rows} - min_prestige = min(prestige_map.values()) if prestige_map else 1.0 + max_prestige = max(prestige_map.values()) if prestige_map else 1.0 # Build trust map for trust requirement checks trust_rows = db.query(ClientTrust).filter( @@ -120,27 +123,21 @@ def _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers): ).all() trust_map = {str(ct.client_id): float(ct.trust_level) for ct in trust_rows} - # Same limited view as LLM's `market browse` — paginate in chunks like - # an LLM would (browse, check, offset to next page if nothing good). + # Browse market with prestige filter (same as LLM's `market browse --required-prestige-lte N`). + # Then paginate within accessible tasks, limited to browse_limit per page. browse_limit = world_cfg.market_browse_default_limit # default: 50 - total_market = db.query(Task).filter(Task.status == TaskStatus.MARKET).count() - market_tasks = [] - for page_offset in range(0, total_market, browse_limit): - page = ( - db.query(Task) - .filter(Task.status == TaskStatus.MARKET) - .order_by(Task.reward_funds_cents.desc()) - .offset(page_offset) - .limit(browse_limit) - .all() + # Use floor of max prestige as filter (greedy: take best available at current level) + prestige_filter = int(max_prestige) + market_tasks = ( + db.query(Task) + .filter( + Task.status == TaskStatus.MARKET, + Task.required_prestige <= prestige_filter, ) - market_tasks.extend(page) - # Stop paginating once we have enough accessible tasks (greedy LLM - # would stop browsing once it finds good options) - accessible = [t for t in page if t.required_trust == 0 or - trust_map.get(str(t.client_id), 0.0) >= t.required_trust] - if accessible: - break + .order_by(Task.reward_funds_cents.desc()) + .limit(browse_limit) + .all() + ) candidates = [] for task in market_tasks: @@ -162,26 +159,20 @@ def _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers): if client_trust < task.required_trust: continue - max_domain_qty = max(float(r.required_qty) for r in reqs) task_reqs = [{"domain": r.domain, "required_qty": float(r.required_qty)} for r in reqs] - - completion_hours = estimate_completion_hours(task_reqs, employee_tiers, n_concurrent_tasks=1) - - is_completable = False - if completion_hours is not None: - deadline = _compute_deadline(sim_state.sim_time, max_domain_qty, world_cfg) - completion_time = add_business_hours(sim_state.sim_time, completion_hours) - is_completable = completion_time <= deadline + # Estimate hours accounting for concurrent task split + concurrent = max(1, n_active + 1) + completion_hours = estimate_completion_hours(task_reqs, employee_tiers, n_concurrent_tasks=concurrent) candidates.append(CandidateTask( task=task, reward_cents=task.reward_funds_cents, prestige_delta=float(task.reward_prestige_delta), completion_hours=completion_hours if completion_hours is not None else Decimal("999999"), - is_completable=is_completable, + is_completable=True, # Always accessible = always a candidate )) - return candidates, min_prestige + return candidates, max_prestige # ── Strategy functions ────────────────────────────────────────────────────── @@ -189,42 +180,40 @@ def _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers): StrategyFn = Callable # (completable: list[CandidateTask], context: dict) -> Optional[CandidateTask] -def strategy_greedy(completable: list[CandidateTask], context: dict) -> Optional[CandidateTask]: +def strategy_greedy(candidates: list[CandidateTask], context: dict) -> Optional[CandidateTask]: """Pick the task with the highest reward.""" - if not completable: + if not candidates: return None - return max(completable, key=lambda c: c.reward_cents) + return max(candidates, key=lambda c: c.reward_cents) -def strategy_random(completable: list[CandidateTask], context: dict) -> Optional[CandidateTask]: - """Pick a random completable task (deterministic via seeded RNG).""" - if not completable: +def strategy_random(candidates: list[CandidateTask], context: dict) -> Optional[CandidateTask]: + """Pick a random accessible task (deterministic via seeded RNG).""" + if not candidates: return None seed = context["seed"] turn = context["turn"] rng = RngStreams(seed).stream(f"bot_random_select:{turn}") - return rng.choice(completable) + return rng.choice(candidates) -def strategy_throughput(completable: list[CandidateTask], context: dict) -> Optional[CandidateTask]: +def strategy_throughput(candidates: list[CandidateTask], context: dict) -> Optional[CandidateTask]: """Pick the task with the highest reward per hour.""" - if not completable: + if not candidates: return None - return max(completable, key=lambda c: Decimal(c.reward_cents) / c.completion_hours) + return max(candidates, key=lambda c: Decimal(c.reward_cents) / c.completion_hours) -def strategy_prestige(completable: list[CandidateTask], context: dict) -> Optional[CandidateTask]: - """Phase 1 (prestige < 5): climb prestige fastest. Phase 2: throughput.""" - if not completable: +def strategy_prestige(candidates: list[CandidateTask], context: dict) -> Optional[CandidateTask]: + """Phase 1 (prestige < 5): climb prestige fast. Phase 2: throughput.""" + if not candidates: return None current_prestige = context["max_prestige"] if current_prestige < 5: - # Prefer tasks that give prestige delta per hour of work - prestige_tasks = [c for c in completable if c.prestige_delta > 0] + prestige_tasks = [c for c in candidates if c.prestige_delta > 0] if prestige_tasks: return max(prestige_tasks, key=lambda c: Decimal(str(c.prestige_delta)) / c.completion_hours) - # Fall back to throughput - return max(completable, key=lambda c: Decimal(c.reward_cents) / c.completion_hours) + return max(candidates, key=lambda c: Decimal(c.reward_cents) / c.completion_hours) STRATEGIES = { @@ -293,7 +282,6 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn) tasks_completed = 0 tasks_failed = 0 - task_cycles_used = 0 turn = 0 while True: @@ -308,160 +296,168 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn) if sim_state.sim_time >= sim_state.horizon_end: break - active_tasks = db.query(Task).filter( + active_count = db.query(Task).filter( Task.company_id == company_id, Task.status == TaskStatus.ACTIVE, - ).all() + ).count() - if active_tasks: - next_event = fetch_next_event(db, company_id, sim_state.horizon_end) - if next_event is None: - break - adv = advance_time(db, company_id, next_event.scheduled_at) - for we in adv.wake_events: - if we.get("type") == "task_completed": - if we.get("success"): - tasks_completed += 1 - else: - tasks_failed += 1 - if adv.bankrupt or adv.horizon_reached: - break - continue + # Accept up to 1 new task per turn (same pace as LLM agent). + # The LLM spends multiple tool calls to browse/accept/assign/dispatch + # one task, so it effectively accepts ~1 per turn. + newly_accepted = [] + while active_count + len(newly_accepted) < MAX_CONCURRENT_TASKS and len(newly_accepted) < 1: + employees = db.query(Employee).filter(Employee.company_id == company_id).all() + employee_tiers = [emp.tier for emp in employees] + employee_ids = [emp.id for emp in employees] - # Get employees — only tier info (same as LLM agent sees) - employees = db.query(Employee).filter(Employee.company_id == company_id).all() - employee_tiers = [emp.tier for emp in employees] - employee_ids = [emp.id for emp in employees] - - candidates, max_prestige = _build_candidates(db, company_id, sim_state, world_cfg, employee_tiers) - completable = [c for c in candidates if c.is_completable] - - context = { - "seed": seed, - "turn": turn, - "max_prestige": max_prestige, - } - chosen = strategy_fn(completable, context) - - if chosen is None: - next_event = fetch_next_event(db, company_id, sim_state.horizon_end) - if next_event is None: - adv = advance_time(db, company_id, sim_state.horizon_end) - break - adv = advance_time(db, company_id, next_event.scheduled_at) - if adv.bankrupt or adv.horizon_reached: - break - continue - - best_task = chosen.task - - # Accept the task - reqs = db.query(TaskRequirement).filter( - TaskRequirement.task_id == best_task.id - ).all() - - # Apply trust reward multiplier and work reduction (same formula as CLI task accept) - if best_task.client_id is not None: - from yc_bench.db.models.client import Client, ClientTrust - client_row = db.query(Client).filter(Client.id == best_task.client_id).one_or_none() - client_multiplier = client_row.reward_multiplier if client_row else 1.0 - ct = db.query(ClientTrust).filter( - ClientTrust.company_id == company_id, - ClientTrust.client_id == best_task.client_id, - ).one_or_none() - trust_level = float(ct.trust_level) if ct else 0.0 - # Reward multiplier - trust_multiplier = ( - world_cfg.trust_base_multiplier - + (client_multiplier ** 2) * world_cfg.trust_reward_scale - * (trust_level ** 2) / world_cfg.trust_max + n_will_be_active = active_count + len(newly_accepted) + candidates, max_prestige = _build_candidates( + db, company_id, sim_state, world_cfg, employee_tiers, + n_active=n_will_be_active, ) - best_task.reward_funds_cents = int(best_task.reward_funds_cents * trust_multiplier) - # Work reduction: trusted clients → less work required - work_reduction = world_cfg.trust_work_reduction_max * (trust_level / world_cfg.trust_max) - for r in reqs: - r.required_qty = int(float(r.required_qty) * (1 - work_reduction)) - max_domain_qty = max(float(r.required_qty) for r in reqs) + context = { + "seed": seed, + "turn": turn + len(newly_accepted), # vary context per pick + "max_prestige": max_prestige, + } + chosen = strategy_fn(candidates, context) + if chosen is None: + break - best_task.status = TaskStatus.PLANNED - best_task.company_id = company_id - best_task.accepted_at = sim_state.sim_time - best_task.deadline = _compute_deadline(sim_state.sim_time, max_domain_qty, world_cfg) + task = chosen.task + newly_accepted.append(task.id) - # Generate replacement (same logic as CLI task accept) - counter = sim_state.replenish_counter - sim_state.replenish_counter = counter + 1 + # Accept the task — same logic as CLI task accept + reqs = db.query(TaskRequirement).filter( + TaskRequirement.task_id == task.id + ).all() + + # Apply trust reward multiplier and work reduction + if task.client_id is not None: + from yc_bench.db.models.client import Client, ClientTrust + client_row = db.query(Client).filter(Client.id == task.client_id).one_or_none() + client_multiplier = client_row.reward_multiplier if client_row else 1.0 + ct = db.query(ClientTrust).filter( + ClientTrust.company_id == company_id, + ClientTrust.client_id == task.client_id, + ).one_or_none() + trust_level = float(ct.trust_level) if ct else 0.0 + trust_multiplier = ( + world_cfg.trust_base_multiplier + + (client_multiplier ** 2) * world_cfg.trust_reward_scale + * (trust_level ** 2) / world_cfg.trust_max + ) + task.reward_funds_cents = int(task.reward_funds_cents * trust_multiplier) + work_reduction = world_cfg.trust_work_reduction_max * (trust_level / world_cfg.trust_max) + for r in reqs: + r.required_qty = int(float(r.required_qty) * (1 - work_reduction)) + + max_domain_qty = max(float(r.required_qty) for r in reqs) + + task.status = TaskStatus.PLANNED + task.company_id = company_id + task.accepted_at = sim_state.sim_time + task.deadline = _compute_deadline(sim_state.sim_time, max_domain_qty, world_cfg) + + # Generate replacement + counter = sim_state.replenish_counter + sim_state.replenish_counter = counter + 1 + + from yc_bench.db.models.client import Client as ClientModel + replaced_client_index = 0 + if task.client_id is not None: + clients = db.query(ClientModel).order_by(ClientModel.name).all() + for i, c in enumerate(clients): + if c.id == task.client_id: + replaced_client_index = i + break + + replacement_spec_domains = None + if task.client_id is not None: + orig_client = db.query(ClientModel).filter(ClientModel.id == task.client_id).one_or_none() + if orig_client: + replacement_spec_domains = orig_client.specialty_domains + + replacement = generate_replacement_task( + run_seed=sim_state.run_seed, + replenish_counter=counter, + replaced_prestige=task.required_prestige, + replaced_client_index=replaced_client_index, + cfg=world_cfg, + specialty_domains=replacement_spec_domains, + ) - from yc_bench.db.models.client import Client as ClientModel - replaced_client_index = 0 - if best_task.client_id is not None: clients = db.query(ClientModel).order_by(ClientModel.name).all() - for i, c in enumerate(clients): - if c.id == best_task.client_id: - replaced_client_index = i - break + replacement_client = clients[replacement.client_index % len(clients)] if clients else None + replacement_client_id = replacement_client.id if replacement_client else None - # Get specialty domains for the replacement client - replacement_spec_domains = None - if best_task.client_id is not None: - orig_client = db.query(ClientModel).filter(ClientModel.id == best_task.client_id).one_or_none() - if orig_client: - replacement_spec_domains = orig_client.specialty_domains + replacement_row = Task( + id=uuid4(), + company_id=None, + client_id=replacement_client_id, + status=TaskStatus.MARKET, + title=replacement.title, + required_prestige=replacement.required_prestige, + reward_funds_cents=replacement.reward_funds_cents, + reward_prestige_delta=replacement.reward_prestige_delta, + skill_boost_pct=replacement.skill_boost_pct, + accepted_at=None, deadline=None, completed_at=None, + success=None, progress_milestone_pct=0, + required_trust=replacement.required_trust, + ) + db.add(replacement_row) + for domain, qty in replacement.requirements.items(): + db.add(TaskRequirement( + task_id=replacement_row.id, + domain=domain, + required_qty=qty, + completed_qty=0, + )) - replacement = generate_replacement_task( - run_seed=sim_state.run_seed, - replenish_counter=counter, - replaced_prestige=best_task.required_prestige, - replaced_client_index=replaced_client_index, - cfg=world_cfg, - specialty_domains=replacement_spec_domains, - ) + # Assign ALL employees to this task + for eid in employee_ids: + db.add(TaskAssignment( + task_id=task.id, + employee_id=eid, + assigned_at=sim_state.sim_time, + )) + db.flush() - clients = db.query(ClientModel).order_by(ClientModel.name).all() - replacement_client = clients[replacement.client_index % len(clients)] if clients else None - replacement_client_id = replacement_client.id if replacement_client else None + task.status = TaskStatus.ACTIVE + db.flush() - replacement_row = Task( - id=uuid4(), - company_id=None, - client_id=replacement_client_id, - status=TaskStatus.MARKET, - title=replacement.title, - required_prestige=replacement.required_prestige, - reward_funds_cents=replacement.reward_funds_cents, - reward_prestige_delta=replacement.reward_prestige_delta, - skill_boost_pct=replacement.skill_boost_pct, - accepted_at=None, deadline=None, completed_at=None, - success=None, progress_milestone_pct=0, - required_trust=replacement.required_trust, - ) - db.add(replacement_row) - for domain, qty in replacement.requirements.items(): - db.add(TaskRequirement( - task_id=replacement_row.id, - domain=domain, - required_qty=qty, - completed_qty=0, - )) + # Recalculate ETAs for all newly accepted tasks + if newly_accepted: + recalculate_etas(db, company_id, sim_state.sim_time, + impacted_task_ids=set(newly_accepted), + milestones=world_cfg.task_progress_milestones) - # Assign ALL employees - for eid in employee_ids: - db.add(TaskAssignment( - task_id=best_task.id, - employee_id=eid, - assigned_at=sim_state.sim_time, - )) - db.flush() + # Now advance time (only if we have active tasks) + total_active = active_count + len(newly_accepted) + if total_active == 0: + # No accessible tasks at all — advance to next event to let + # prestige/trust change, then try again. + next_event = fetch_next_event(db, company_id, sim_state.horizon_end) + if next_event is None: + break + adv = advance_time(db, company_id, next_event.scheduled_at) + if adv.bankrupt or adv.horizon_reached: + break + continue - best_task.status = TaskStatus.ACTIVE - db.flush() - - recalculate_etas(db, company_id, sim_state.sim_time, - impacted_task_ids={best_task.id}, - milestones=world_cfg.task_progress_milestones) - - task_cycles_used += 1 + next_event = fetch_next_event(db, company_id, sim_state.horizon_end) + if next_event is None: + break + adv = advance_time(db, company_id, next_event.scheduled_at) + for we in adv.wake_events: + if we.get("type") == "task_completed": + if we.get("success"): + tasks_completed += 1 + else: + tasks_failed += 1 + if adv.bankrupt or adv.horizon_reached: + break # Final state + extract time series for plotting diff --git a/src/yc_bench/agent/prompt.py b/src/yc_bench/agent/prompt.py index b97b906..1e0b0e8 100644 --- a/src/yc_bench/agent/prompt.py +++ b/src/yc_bench/agent/prompt.py @@ -59,7 +59,7 @@ Your goal is to maximize company prestige and funds over the simulation horizon - Task completion after deadline = failure (0.8x prestige penalty, no reward, trust penalty) - Task cancellation = 1.2x prestige penalty per domain + trust penalty (worse than failure) - Employee throughput = base_rate / number_of_active_tasks_assigned -- Time advances only when you run `yc-bench sim resume` +- Time advances only when you run `yc-bench sim resume`. **Note**: `sim resume` is blocked if you have no active (dispatched) tasks — you must accept, assign, and dispatch at least one task before time can advance. - Prestige is clamped [1, 10]. Funds are in cents. ## Client Trust @@ -149,13 +149,13 @@ def build_turn_context( if active_tasks == 0 and planned_tasks == 0: parts.append( "\n**ACTION REQUIRED**: No tasks are running. " - "Accept a task, assign employees to it, dispatch it, then call `yc-bench sim resume`. " - "Do this now — every turn without active tasks burns runway." + "`sim resume` is BLOCKED until you have active tasks. " + "Accept a task, assign employees to it, and dispatch it now." ) elif planned_tasks > 0 and active_tasks == 0: parts.append( "\n**ACTION REQUIRED**: You have planned tasks but none are dispatched. " - "Assign employees and dispatch now, then call `yc-bench sim resume`." + "`sim resume` is BLOCKED until you dispatch. Assign employees and dispatch now." ) else: parts.append("\nDecide your next actions. Use `run_command` to execute CLI commands.") diff --git a/src/yc_bench/config/presets/default.toml b/src/yc_bench/config/presets/default.toml index a1e0c78..52bd696 100644 --- a/src/yc_bench/config/presets/default.toml +++ b/src/yc_bench/config/presets/default.toml @@ -77,6 +77,22 @@ penalty_cancel_multiplier = 2.0 # hardened: was 1.2 # At 0.55: a prestige-8 task pays ~4.85x more than a prestige-1 task. reward_prestige_scale = 0.55 # hardened: was 0.3 +# --- Client trust --- +# trust_build_rate: ~tasks to reach 80% max trust (higher = slower) +# trust_fragility: 0-1, how punishing failures/inactivity are +# trust_focus_pressure: 0-1, penalty for spreading work across clients +# trust_reward_ceiling: payout multiplier a Premium client gives at max trust +# trust_work_reduction_max: max work reduction at max trust +# trust_gating_fraction: fraction of tasks that require established trust +num_clients = 8 +trust_max = 5.0 +trust_build_rate = 20.0 +trust_fragility = 0.5 +trust_focus_pressure = 0.5 +trust_reward_ceiling = 2.6 +trust_work_reduction_max = 0.40 +trust_gating_fraction = 0.20 + # Daily prestige decay per domain. Domains not exercised lose prestige # over time: -0.005/day → -0.15/month. Untouched domain drops ~1 level # every ~6 months. Prevents single-domain hyper-specialization. diff --git a/src/yc_bench/config/presets/easy.toml b/src/yc_bench/config/presets/easy.toml index 8457754..2e619a5 100644 --- a/src/yc_bench/config/presets/easy.toml +++ b/src/yc_bench/config/presets/easy.toml @@ -44,6 +44,14 @@ salary_bump_pct = 0.005 # Low reward scaling — prestige climbing not yet necessary. reward_prestige_scale = 0.3 +# --- Client trust (forgiving: builds fairly fast, mild penalties) --- +trust_build_rate = 15.0 +trust_fragility = 0.3 +trust_focus_pressure = 0.3 +trust_reward_ceiling = 2.8 +trust_work_reduction_max = 0.40 +trust_gating_fraction = 0.15 + [world.dist.required_prestige] type = "triangular" low = 1 diff --git a/src/yc_bench/config/presets/hard.toml b/src/yc_bench/config/presets/hard.toml index 180c0f0..34ce080 100644 --- a/src/yc_bench/config/presets/hard.toml +++ b/src/yc_bench/config/presets/hard.toml @@ -58,6 +58,14 @@ salary_bump_pct = 0.01 # High-prestige tasks pay substantially more. reward_prestige_scale = 0.55 +# --- Client trust (harsh: slow build, heavy penalties, lower ceiling) --- +trust_build_rate = 25.0 +trust_fragility = 0.7 +trust_focus_pressure = 0.7 +trust_reward_ceiling = 2.4 +trust_work_reduction_max = 0.35 +trust_gating_fraction = 0.25 + [world.dist.required_prestige] type = "triangular" low = 1 diff --git a/src/yc_bench/config/presets/medium.toml b/src/yc_bench/config/presets/medium.toml index cd1c265..6b39db5 100644 --- a/src/yc_bench/config/presets/medium.toml +++ b/src/yc_bench/config/presets/medium.toml @@ -51,6 +51,14 @@ salary_bump_pct = 0.01 # Prestige scaling starting to reward climbing. reward_prestige_scale = 0.45 +# --- Client trust (balanced: default build speed, moderate penalties) --- +trust_build_rate = 20.0 +trust_fragility = 0.5 +trust_focus_pressure = 0.5 +trust_reward_ceiling = 2.6 +trust_work_reduction_max = 0.40 +trust_gating_fraction = 0.20 + [world.dist.required_prestige] type = "triangular" low = 1 diff --git a/src/yc_bench/config/presets/nightmare.toml b/src/yc_bench/config/presets/nightmare.toml index 13d0aec..576a149 100644 --- a/src/yc_bench/config/presets/nightmare.toml +++ b/src/yc_bench/config/presets/nightmare.toml @@ -67,6 +67,14 @@ salary_bump_pct = 0.02 # This is what makes the prestige climb existentially necessary. reward_prestige_scale = 0.7 +# --- Client trust (brutal: very slow build, severe penalties, tight ceiling) --- +trust_build_rate = 30.0 +trust_fragility = 0.9 +trust_focus_pressure = 0.8 +trust_reward_ceiling = 2.2 +trust_work_reduction_max = 0.30 +trust_gating_fraction = 0.30 + [world.dist.required_prestige] type = "triangular" low = 1 diff --git a/src/yc_bench/config/presets/tutorial.toml b/src/yc_bench/config/presets/tutorial.toml index 6e2e17b..649dda6 100644 --- a/src/yc_bench/config/presets/tutorial.toml +++ b/src/yc_bench/config/presets/tutorial.toml @@ -44,6 +44,14 @@ salary_bump_pct = 0.0 # Mild reward scaling — no need to climb prestige. reward_prestige_scale = 0.2 +# --- Client trust (very forgiving: builds fast, low penalties, generous payoff) --- +trust_build_rate = 10.0 +trust_fragility = 0.2 +trust_focus_pressure = 0.2 +trust_reward_ceiling = 3.0 +trust_work_reduction_max = 0.40 +trust_gating_fraction = 0.10 + [world.dist.required_prestige] type = "constant" value = 1 # ALL tasks are prestige-1 — no gating at all. diff --git a/src/yc_bench/config/schema.py b/src/yc_bench/config/schema.py index 6473c1f..5c03f97 100644 --- a/src/yc_bench/config/schema.py +++ b/src/yc_bench/config/schema.py @@ -136,19 +136,44 @@ class WorldConfig(BaseModel): # every ~3 months. Floored at prestige_min. prestige_decay_per_day: float = 0.005 - # --- Client trust --- + # --- Client trust (intuitive knobs) --- num_clients: int = 8 trust_max: float = 5.0 + # ~how many successful tasks to reach 80% of max trust with one client + trust_build_rate: float = 20.0 + # 0-1: how punishing failures/inactivity are (0=forgiving, 1=harsh) + trust_fragility: float = 0.5 + # 0-1: how much working for one client hurts trust with others (0=none, 1=heavy) + trust_focus_pressure: float = 0.5 + # payout multiplier a typical Premium client (mult≈1.3) gives at max trust + trust_reward_ceiling: float = 2.6 + # max work reduction at max trust (0.4 = 40% less work) + trust_work_reduction_max: float = 0.40 + # fraction of tasks that require trust (~0.2 = 20%) + trust_gating_fraction: float = 0.20 + + # --- Derived trust params (computed from knobs above, do not set directly) --- trust_min: float = 0.0 - trust_gain_base: float = 0.40 + trust_gain_base: float = 0.0 trust_gain_diminishing_power: float = 1.5 - trust_fail_penalty: float = 0.3 - trust_cancel_penalty: float = 0.5 - trust_decay_per_day: float = 0.015 - trust_cross_client_decay: float = 0.03 # completing work for Client A erodes trust with other clients - trust_base_multiplier: float = 0.50 # all clients start at 50% of listed reward - trust_reward_scale: float = 0.25 # reward = listed × (base + client_mult² × scale × trust²/trust_max) - trust_work_reduction_max: float = 0.40 # trusted clients give clearer specs → up to 40% less work at max trust + trust_fail_penalty: float = 0.0 + trust_cancel_penalty: float = 0.0 + trust_decay_per_day: float = 0.0 + trust_cross_client_decay: float = 0.0 + trust_base_multiplier: float = 0.50 + trust_reward_scale: float = 0.0 + trust_reward_threshold: float = 0.0 + trust_reward_ramp: float = 0.0 + trust_level_reward_scale: float = 3.0 + trust_level_max_required: int = 4 + trust_gated_reward_boost: float = 0.15 + client_reward_mult_low: float = 0.7 + client_reward_mult_high: float = 2.5 + client_reward_mult_mode: float = 1.0 + client_single_specialty_prob: float = 0.6 + client_tier_premium_threshold: float = 1.0 + client_tier_enterprise_threshold: float = 1.7 + task_specialty_domain_bias: float = 0.7 # Required qty scaling by prestige: qty *= 1 + prestige_qty_scale * (prestige - 1). # At 0.3: prestige-5 tasks need 2.2× the work of prestige-1 tasks. @@ -191,6 +216,45 @@ class WorldConfig(BaseModel): ) ) + @model_validator(mode="after") + def _derive_trust_params(self) -> WorldConfig: + """Derive detailed trust parameters from the intuitive knobs. + + Derivation preserves default behavior: trust_build_rate=20, fragility=0.5, + focus_pressure=0.5, reward_ceiling=2.6 produce the same values as the + original hardcoded defaults. + """ + # trust_build_rate → gain_base + # Approximate: gain_base ≈ trust_max × 1.6 / build_rate + # At default (20): 5.0 × 1.6 / 20 = 0.40 + self.trust_gain_base = self.trust_max * 1.6 / self.trust_build_rate + + # trust_fragility → fail_penalty, cancel_penalty, decay_per_day + # At 0.5: fail=0.3, cancel=0.5, decay=0.015 + self.trust_fail_penalty = self.trust_fragility * 0.6 + self.trust_cancel_penalty = self.trust_fragility * 1.0 + self.trust_decay_per_day = self.trust_fragility * 0.03 + + # trust_focus_pressure → cross_client_decay + # At 0.5: cross_client_decay = 0.03 + self.trust_cross_client_decay = self.trust_focus_pressure * 0.06 + + # trust_reward_ceiling → reward_scale + # ceiling = base_multiplier + ref_mult² × scale × trust_max + # Using Premium reference (mult≈1.3): scale = (ceiling - 0.50) / (1.69 × trust_max) + ref_mult_sq = 1.69 # 1.3² + self.trust_reward_scale = ( + (self.trust_reward_ceiling - self.trust_base_multiplier) + / (ref_mult_sq * self.trust_max) + ) + + # trust_gating_fraction → threshold + ramp + # At 0.2: threshold=0.6, ramp=0.4 (top 40% CAN require, effective ~20%) + self.trust_reward_threshold = max(0.0, 1.0 - 2.0 * self.trust_gating_fraction) + self.trust_reward_ramp = min(1.0, 2.0 * self.trust_gating_fraction) + + return self + @model_validator(mode="after") def _salary_shares_sum_to_one(self) -> WorldConfig: total = self.salary_junior.share + self.salary_mid.share + self.salary_senior.share diff --git a/src/yc_bench/services/generate_clients.py b/src/yc_bench/services/generate_clients.py index 2a1b13d..a87cac8 100644 --- a/src/yc_bench/services/generate_clients.py +++ b/src/yc_bench/services/generate_clients.py @@ -2,6 +2,7 @@ from __future__ import annotations from dataclasses import dataclass, field +from ..config.schema import WorldConfig from ..db.models.company import Domain from .rng import RngStreams @@ -26,16 +27,11 @@ _CLIENT_NAME_POOL = [ _ALL_DOMAINS = list(Domain) -def _tier_from_multiplier(mult: float) -> str: - """Map reward multiplier to a visible tier label. - - Standard: [0.7, 1.0) - Premium: [1.0, 1.7) - Enterprise: [1.7, 2.5] - """ - if mult < 1.0: +def _tier_from_multiplier(mult: float, cfg: WorldConfig) -> str: + """Map reward multiplier to a visible tier label.""" + if mult < cfg.client_tier_premium_threshold: return "Standard" - if mult < 1.7: + if mult < cfg.client_tier_enterprise_threshold: return "Premium" return "Enterprise" @@ -48,12 +44,10 @@ class GeneratedClient: specialty_domains: list[str] = field(default_factory=list) -def generate_clients(*, run_seed: int, count: int) -> list[GeneratedClient]: - """Generate clients with seeded reward multipliers, tiers, and specialty domains. - - Multipliers range from 0.7 to 2.5 (triangular, mode 1.0). - Each client gets 1-2 specialty domains (60% get 1, 40% get 2). - """ +def generate_clients(*, run_seed: int, count: int, cfg: WorldConfig | None = None) -> list[GeneratedClient]: + """Generate clients with seeded reward multipliers, tiers, and specialty domains.""" + if cfg is None: + cfg = WorldConfig() if count <= 0: return [] if count > len(_CLIENT_NAME_POOL): @@ -64,10 +58,10 @@ def generate_clients(*, run_seed: int, count: int) -> list[GeneratedClient]: names = rng.sample(_CLIENT_NAME_POOL, count) clients = [] for name in names: - mult = round(rng.triangular(0.7, 2.5, 1.0), 2) - tier = _tier_from_multiplier(mult) - # 60% chance of 1 specialty, 40% chance of 2 - n_specialties = 1 if rng.random() < 0.6 else 2 + mult = round(rng.triangular(cfg.client_reward_mult_low, cfg.client_reward_mult_high, + cfg.client_reward_mult_mode), 2) + tier = _tier_from_multiplier(mult, cfg) + n_specialties = 1 if rng.random() < cfg.client_single_specialty_prob else 2 specialties = [d.value for d in rng.sample(_ALL_DOMAINS, n_specialties)] clients.append(GeneratedClient( name=name, diff --git a/src/yc_bench/services/generate_tasks.py b/src/yc_bench/services/generate_tasks.py index a0626c5..458819c 100644 --- a/src/yc_bench/services/generate_tasks.py +++ b/src/yc_bench/services/generate_tasks.py @@ -64,10 +64,10 @@ def _sample_required_qty(rng, cfg): return int(sample_from_spec(rng, cfg.dist.required_qty)) -def _sample_domains_with_bias(rng, k, specialty_domains=None): +def _sample_domains_with_bias(rng, k, specialty_domains=None, specialty_bias=0.7): """Sample k domains, biased toward client specialties. - First domain pick: 70% chance of being a specialty (if specialties exist). + First domain pick: specialty_bias chance of being a specialty (if specialties exist). Remaining picks: uniform random from remaining domains. """ if not specialty_domains or k <= 0: @@ -76,9 +76,9 @@ def _sample_domains_with_bias(rng, k, specialty_domains=None): picked = [] available = list(_ALL_DOMAINS) - # First pick: 70% specialty bias + # First pick: specialty bias specialty_enums = [d for d in _ALL_DOMAINS if d.value in specialty_domains] - if specialty_enums and rng.random() < 0.7: + if specialty_enums and rng.random() < specialty_bias: first = rng.choice(specialty_enums) else: first = rng.choice(available) @@ -95,7 +95,8 @@ def _sample_domains_with_bias(rng, k, specialty_domains=None): def _sample_requirements(rng, cfg, prestige=1, specialty_domains=None): k = _sample_domain_count(rng, cfg) - picked_domains = _sample_domains_with_bias(rng, k, specialty_domains=specialty_domains) + picked_domains = _sample_domains_with_bias(rng, k, specialty_domains=specialty_domains, + specialty_bias=cfg.task_specialty_domain_bias) scale = 1 + cfg.prestige_qty_scale * (prestige - 1) return {domain: int(_sample_required_qty(rng, cfg) * scale) for domain in picked_domains} @@ -116,22 +117,20 @@ def _required_trust_from_reward(rng, cfg, reward_cents): reward_frac = min(1.0, (reward_cents - reward_floor) / (reward_ceiling - reward_floor)) - # Only premium tasks (top ~30%) require trust. Clients reserve their - # best projects for proven vendors; routine work is open to anyone. - trust_prob = max(0.0, (reward_frac - 0.6) / 0.4) # 0 below 60th pct, ramps to 1.0 + # Only premium tasks (top portion) require trust. + trust_prob = max(0.0, (reward_frac - cfg.trust_reward_threshold) / cfg.trust_reward_ramp) if rng.random() >= trust_prob: return 0 - # Trust level required: 1 at threshold, up to 4 for top tasks - return max(1, min(int(1 + reward_frac * 3), 4)) + # Trust level required: 1 at threshold, up to max for top tasks + return max(1, min(int(1 + reward_frac * cfg.trust_level_reward_scale), cfg.trust_level_max_required)) def _make_task(rng, cfg, prestige, serial, requirements, client_index=0): reward = _sample_reward_funds_cents(rng, cfg, prestige=prestige) required_trust = _required_trust_from_reward(rng, cfg, reward) - # Trust-gated tasks get a reward boost (premium projects pay more) if required_trust > 0: - reward = int(reward * (1.0 + 0.15 * required_trust)) + reward = int(reward * (1.0 + cfg.trust_gated_reward_boost * required_trust)) return GeneratedTask( title=f"Task-{serial}", required_prestige=prestige, diff --git a/src/yc_bench/services/seed_world.py b/src/yc_bench/services/seed_world.py index 71c7769..a6b066b 100644 --- a/src/yc_bench/services/seed_world.py +++ b/src/yc_bench/services/seed_world.py @@ -83,7 +83,7 @@ def _seed_employees(db, company, req): def _seed_clients(db, company, req): """Create Client rows and ClientTrust rows (all starting at 0.0).""" - generated = generate_clients(run_seed=req.run_seed, count=req.cfg.num_clients) + generated = generate_clients(run_seed=req.run_seed, count=req.cfg.num_clients, cfg=req.cfg) clients = [] for gc in generated: client = Client(id=uuid4(), name=gc.name, reward_multiplier=gc.reward_multiplier, diff --git a/system_design/11_client_trust.md b/system_design/11_client_trust.md index 459eee7..19b805b 100644 --- a/system_design/11_client_trust.md +++ b/system_design/11_client_trust.md @@ -1,194 +1,120 @@ # Client Trust System -**Location**: `src/yc_bench/db/models/client.py`, `src/yc_bench/services/generate_clients.py`, `src/yc_bench/core/handlers/task_complete.py`, `src/yc_bench/cli/client_commands.py` +**Location**: `services/generate_clients.py`, `services/generate_tasks.py`, `core/handlers/task_complete.py`, `cli/task_commands.py` ## Overview -Client trust is YC-Bench's second progression axis alongside prestige. While prestige gates *which tasks you can access*, trust determines *how profitable those tasks are*. Every task is offered by a specific client (e.g. "Nexus AI", "Vertex Labs"). Building trust with a client increases payouts and reduces work required, creating a compounding loop that rewards focused relationship-building over scattered effort. - -## Design Goals - -The trust system was designed to create **genuine strategic diversity** where multiple strategies are viable and no single approach clearly dominates: - -| Strategy | Description | Risk | Ceiling | -|----------|-------------|------|---------| -| Domain-aligned focus | Pick clients whose specialties match prestige strengths | Low | Medium-High | -| High-tier gamble | Enterprise clients despite domain mismatch | High | Highest | -| Conservative | Standard-tier, right domains, profitable day 1 | Lowest | Medium | -| Diversified | 3-4 clients, broad coverage | Medium | Medium | -| Trust investor | Cheap tasks from high-tier to build trust early | Medium | High | - -## Clients - -### Generation (`generate_clients.py`) - -Clients are generated at world-seeding time with seeded RNG: - -- **Count**: 8 clients (configurable via `num_clients`) -- **Names**: Drawn from a pool of 15 AI company names (e.g. "Nexus AI", "Cipher Corp") -- **Reward multiplier**: `triangular(0.7, 2.5, mode=1.0)` — hidden from the agent -- **Tier**: Derived from multiplier (visible to the agent) -- **Specialty domains**: 1-2 domains per client (60% get 1, 40% get 2) - -### Tiers - -Tiers are the agent-visible proxy for the hidden reward multiplier: - -| Tier | Multiplier Range | Meaning | -|------|-----------------|---------| -| Standard | [0.7, 1.0) | Lower reward ceiling but safer early | -| Premium | [1.0, 1.7) | Moderate scaling | -| Enterprise | [1.7, 2.5] | Highest ceiling but requires high trust to be profitable | - -**Design choice**: The exact multiplier is hidden. The agent sees only the tier label via `yc-bench client list`. This prevents the trivial strategy of "always pick the highest multiplier" and requires experimentation to discover which clients are most valuable. - -### Specialty Domains - -Each client has 1-2 specialty domains (e.g. "research", "training"). Tasks from a client are biased toward their specialties: - -- **70% chance** the first domain requirement is a specialty domain -- **30% chance** it's random - -This creates domain alignment as a strategic lever — a Premium client whose specialties match your prestige strengths may outperform an Enterprise client in domains where you're weak. - -## Trust Mechanics - -### Trust Level - -Trust is tracked per (company, client) pair in the `ClientTrust` table. Range: [0.0, 5.0]. - -### Trust Gain (on task success) - -``` -gain = trust_gain_base × (1 - trust/trust_max)^trust_gain_diminishing_power -``` - -Default parameters: -- `trust_gain_base`: 0.40 -- `trust_gain_diminishing_power`: 1.5 -- `trust_max`: 5.0 - -Diminishing returns mean early trust builds fast (~0.40 per task at trust 0) but slows significantly as trust approaches max (~0.07 per task at trust 4). - -### Trust Loss - -| Event | Penalty | -|-------|---------| -| Task failure (late) | -0.3 trust | -| Task cancellation | -0.5 trust | - -### Trust Decay - -Trust decays daily at `trust_decay_per_day` (default: 0.015/day). Inactive client relationships erode over time, requiring continued work to maintain. - -### Cross-Client Decay - -Completing a task for Client A reduces trust with *all other clients* by `trust_cross_client_decay` (default: 0.03). This models exclusivity pressure — clients notice when you spread attention thin. It penalizes scattered work and rewards focusing on 2-3 key clients. - -## Reward Scaling - -### Trust Reward Formula - -``` -actual_reward = listed_reward × trust_multiplier - -trust_multiplier = trust_base_multiplier + client_mult² × trust_reward_scale × trust² / trust_max -``` - -Default parameters: -- `trust_base_multiplier`: 0.50 (everyone starts at 50% of listed reward) -- `trust_reward_scale`: 0.25 -- `trust_max`: 5.0 - -At trust 0, all clients pay 50% of listed reward regardless of tier. At max trust: - -| Tier | Example Mult | Trust Multiplier at trust=5 | -|------|-------------|---------------------------| -| Standard | 0.85 | 0.50 + 0.72 × 0.25 × 5 = 1.40 | -| Premium | 1.3 | 0.50 + 1.69 × 0.25 × 5 = 2.61 | -| Enterprise | 2.0 | 0.50 + 4.0 × 0.25 × 5 = 5.50 | - -**Design choice**: The quadratic scaling on both multiplier and trust creates dramatic tier separation at high trust while keeping all clients roughly equivalent at low trust. Enterprise clients are actually *worse* than Standard at trust 0 (same 50% payout, but harder tasks due to specialty mismatch), making them a genuine investment gamble. - -### Work Reduction - -``` -work_reduction = trust_work_reduction_max × trust / trust_max -``` - -Default `trust_work_reduction_max`: 0.40 (up to 40% less work at max trust). - -Applied at task acceptance: each domain's `required_qty` is multiplied by `(1 - work_reduction)`. This compounds with higher rewards — at high trust you earn more in less time. - -**Design choice**: Work reduction represents "trusted clients give clearer specs." This creates the compounding loop: trust → less work → faster completion → more tasks per month → more trust → even better returns. - -## Trust Gating - -~20% of tasks have a `required_trust` field (sampled from `triangular(1, 5, mode=2)`). The agent cannot accept these tasks unless trust with the task's client meets the threshold. - -```python -if task.required_trust > 0: - if client_trust < task.required_trust: - reject("Insufficient trust with client") -``` - -**Design choice**: Trust-gated tasks are the highest-value opportunities. They ensure that building trust is not just about better payouts but also about unlocking premium work that's invisible to low-trust agents. - -## Sim Resume Blocking - -To prevent catastrophic payroll drain when the agent has no active work, `sim resume` is **blocked** when there are zero active tasks: - -```python -# In sim_commands.py -if active_count == 0: - return {"ok": False, "error": "BLOCKED: No active tasks..."} -``` - -The agent loop filters blocked responses (those with `ok: False`) and treats them as no-ops rather than time advances. The auto-advance mechanism in the loop also checks for active tasks before forcing time forward. - -**Design choice**: Without this guard, an LLM agent calling `sim resume` while idle would skip months of payroll with zero revenue — a catastrophic and unrecoverable error. The block forces the agent to accept/dispatch work before time can advance. - -## Agent Visibility - -The agent sees the following via `yc-bench client list`: - -```json -{ - "client_id": "uuid", - "name": "Nexus AI", - "trust_level": 1.234, - "tier": "Enterprise", - "specialties": ["research", "training"] -} -``` - -**Not visible**: exact reward multiplier, trust formula parameters, cross-client decay rate. - -Tasks in `market browse` show `client_name` and `required_trust`. The agent must infer client value by observing actual payouts over time. +Trust is the second progression axis alongside prestige. Prestige gates task access; trust determines profitability. Every task belongs to a client. Building trust increases payouts and reduces work, rewarding focused relationship-building. ## Configuration -All trust parameters are in `WorldConfig` (see `config/schema.py`): +The trust system is controlled by **7 intuitive knobs** in `WorldConfig`. All internal parameters are derived automatically. -| Parameter | Default | Description | -|-----------|---------|-------------| -| `num_clients` | 8 | Number of clients | +| Knob | Default | Meaning | +|------|---------|---------| +| `num_clients` | 8 | Number of clients in the game | | `trust_max` | 5.0 | Maximum trust level | -| `trust_min` | 0.0 | Minimum trust level | -| `trust_gain_base` | 0.40 | Base trust gain per success | -| `trust_gain_diminishing_power` | 1.5 | Diminishing returns exponent | -| `trust_fail_penalty` | 0.3 | Trust lost on task failure | -| `trust_cancel_penalty` | 0.5 | Trust lost on task cancellation | -| `trust_decay_per_day` | 0.015 | Daily trust decay | -| `trust_cross_client_decay` | 0.03 | Trust erosion with other clients per task | -| `trust_base_multiplier` | 0.50 | Starting reward fraction (all clients) | -| `trust_reward_scale` | 0.25 | Trust reward scaling factor | -| `trust_work_reduction_max` | 0.40 | Max work reduction at max trust | +| `trust_build_rate` | 20.0 | ~tasks to reach 80% max trust with one client | +| `trust_fragility` | 0.5 | 0–1: how punishing failures/inactivity are | +| `trust_focus_pressure` | 0.5 | 0–1: penalty for spreading work across clients | +| `trust_reward_ceiling` | 2.6 | Payout multiplier a Premium client gives at max trust | +| `trust_work_reduction_max` | 0.40 | Max work reduction at max trust (40%) | +| `trust_gating_fraction` | 0.20 | Fraction of tasks that require trust (~20%) | -## Strategic Implications +### Derivation -1. **Focus vs. Diversify**: Cross-client decay penalizes spreading thin, but relying on one client is risky if their specialty doesn't match your prestige growth -2. **Tier vs. Domain**: An Enterprise client in the wrong domain may underperform a Premium client in the right domain -3. **Early vs. Late**: Standard clients are more profitable early (same 50% payout, less specialty mismatch), while Enterprise clients only shine at high trust -4. **Trust as Investment**: Early tasks for a high-tier client are effectively loss-leaders — you earn below-market rates to build a relationship that compounds later -5. **Hidden Information**: The agent must experiment and observe payouts to discover which clients are truly valuable, creating an exploration-exploitation tradeoff +These knobs derive all internal parameters via `_derive_trust_params()`: + +``` +gain_base = trust_max × 1.6 / trust_build_rate +fail_penalty = fragility × 0.6 +cancel_penalty = fragility × 1.0 +decay_per_day = fragility × 0.03 +cross_client_decay = focus_pressure × 0.06 +reward_scale = (reward_ceiling - 0.50) / (1.69 × trust_max) +reward_threshold = 1.0 - 2 × gating_fraction +reward_ramp = 2 × gating_fraction +``` + +## Client Generation + +At world-seeding time, `num_clients` clients are generated with: +- **Reward multiplier**: `triangular(0.7, 2.5, mode=1.0)` — hidden from agent +- **Tier** (visible): Standard `[0.7, 1.0)`, Premium `[1.0, 1.7)`, Enterprise `[1.7, 2.5]` +- **Specialties**: 1 domain (60%) or 2 domains (40%) + +## Task Domain Bias + +First domain pick has 70% chance of matching client specialty. Remaining domains uniform random. + +## Trust Gating + +High-reward tasks may require trust: + +``` +reward_frac = (reward - floor) / (ceiling - floor) +trust_prob = max(0, (reward_frac - threshold) / ramp) +level = clamp(1 + reward_frac × 3, 1, 4) +``` + +Trust-gated tasks get a 15% reward boost per required trust level. + +**Why**: Clients reserve best projects for proven vendors. + +## Trust Reward Formula (at task accept) + +``` +trust_multiplier = 0.50 + client_mult² × reward_scale × trust² / trust_max +actual_reward = listed_reward × trust_multiplier +``` + +At trust 0, everyone gets 50% of listed reward. At max trust: + +| Tier | mult | multiplier | +|------|------|-----------| +| Standard | 0.85 | 1.40× | +| Premium | 1.30 | 2.60× | +| Enterprise | 2.00 | 5.50× | + +**Why**: Quadratic on both mult and trust creates dramatic tier separation at high trust. Enterprise is worse than Standard at trust 0 — a genuine investment gamble. + +## Work Reduction (at task accept) + +``` +work_reduction = trust_work_reduction_max × trust / trust_max +required_qty *= (1 - work_reduction) +``` + +**Why**: Trusted clients give clearer specs. Creates virtuous cycle: trust → less work → faster completion → more tasks → more trust. + +## Trust Gain (task success) + +``` +gain = gain_base × (1 - trust/trust_max) ^ 1.5 +``` + +Diminishing returns: ~0.40/task at trust 0, ~0.07/task at trust 4. + +## Trust Loss + +| Event | Penalty | +|-------|---------| +| Task failure | `fragility × 0.6` (default 0.3) | +| Task cancel | `fragility × 1.0` (default 0.5) | + +## Trust Decay + +- **Daily**: `fragility × 0.03` per day (default 0.015) +- **Cross-client**: `focus_pressure × 0.06` per task for other client (default 0.03) + +**Why**: Cross-client decay penalizes scattering and rewards focusing on 2–3 clients. + +## Sim Resume Blocking + +`sim resume` is blocked when no active tasks. Auto-advance also skips when idle. + +**Why**: Prevents LLM from burning months of payroll while doing nothing. + +## Agent Visibility + +Visible: client name, trust_level, tier, specialties. Not visible: exact multiplier, formulas, decay rates.