diff --git a/src/yc_bench/agent/loop.py b/src/yc_bench/agent/loop.py index 7c192e9..575104a 100644 --- a/src/yc_bench/agent/loop.py +++ b/src/yc_bench/agent/loop.py @@ -167,21 +167,37 @@ def run_agent_loop( commands_executed = _extract_commands(result.raw_result) resume_payload = result.resume_payload + # Ignore blocked sim resume responses (ok=False means no time actually advanced) + if resume_payload is not None and not resume_payload.get("ok", True): + logger.info("Turn %d: sim resume was blocked (no active tasks).", turn_num) + resume_payload = None if result.checkpoint_advanced and resume_payload is not None: logger.info("Turn %d: agent called sim resume.", turn_num) turns_since_resume = 0 else: turns_since_resume += 1 if command_executor is not None and turns_since_resume >= auto_advance_after_turns: - logger.info( - "Turn %d: %d consecutive turns without sim resume; auto-advancing.", - turn_num, turns_since_resume, - ) - resume_payload, err = _auto_resume(command_executor) - if err: - logger.warning("Auto-resume failed on turn %d: %s", turn_num, err) + # Only auto-advance if there are active tasks (employees working). + # When idle (no active tasks), advancing just burns payroll with + # zero productivity — let the agent keep planning instead. + with db_factory() as db: + idle_snapshot = _snapshot_state(db, company_id) + has_active = idle_snapshot["active_tasks"] > 0 + if has_active: + logger.info( + "Turn %d: %d consecutive turns without sim resume; auto-advancing.", + turn_num, turns_since_resume, + ) + resume_payload, err = _auto_resume(command_executor) + if err: + logger.warning("Auto-resume failed on turn %d: %s", turn_num, err) + else: + turns_since_resume = 0 else: - turns_since_resume = 0 + logger.info( + "Turn %d: %d turns without resume but no active tasks; skipping auto-advance.", + turn_num, turns_since_resume, + ) if resume_payload is not None: # Query full state so the agent sees active/planned task counts diff --git a/src/yc_bench/agent/prompt.py b/src/yc_bench/agent/prompt.py index 812c073..b97b906 100644 --- a/src/yc_bench/agent/prompt.py +++ b/src/yc_bench/agent/prompt.py @@ -42,8 +42,8 @@ Your goal is to maximize company prestige and funds over the simulation horizon ## Strategy Guidelines -1. **Check company status first** to understand your financial position and runway. -2. **Browse the market** for tasks you can accept (check prestige requirements). +1. **Check company status first** to understand your financial position, runway, and **current prestige levels per domain**. +2. **Browse the market at your prestige level** — use `--required-prestige-lte N` where N matches your highest prestige. Higher-prestige tasks pay significantly more (prestige-5 tasks pay ~2.2x more than prestige-1). As your prestige grows, ALWAYS increase your browse filter to find better-paying tasks. 3. **Accept tasks** that match your capabilities and offer good reward-to-risk ratio. 4. **Assign employees strategically** — employees split throughput across active tasks. Focus employees on fewer tasks for faster completion. 5. **Dispatch tasks** once assigned, then continue monitoring progress/events via status and reports. @@ -51,6 +51,7 @@ Your goal is to maximize company prestige and funds over the simulation horizon 7. **Watch payroll** — monthly salaries are deducted automatically. Don't let runway drop to zero. 8. **Use status checks** to track critical milestones and risks. 9. **Successful tasks** award funds + prestige + employee skill boosts. Build momentum. +10. **Scale up over time** — regularly check `yc-bench company status` to see your prestige. Browse higher-prestige tasks as you grow — staying on prestige-1 tasks when you have prestige 5+ leaves enormous revenue on the table. ## Key Rules @@ -199,12 +200,13 @@ def build_initial_user_prompt( "", "**Your immediate priority**: generate revenue before payroll drains your runway.", "You MUST complete these steps now (multiple commands per turn are fine):", - "1. `yc-bench market browse --required-prestige-lte 1` — find tasks you can accept", - "2. `yc-bench task accept --task-id ` — accept 2-3 suitable tasks", - "3. `yc-bench employee list` — get employee IDs", - "4. `yc-bench task assign --task-id --employee-id ` — assign employees", - "5. `yc-bench task dispatch --task-id ` — start work on each assigned task", - "6. `yc-bench sim resume` — advance time to collect the first task completion event", + "1. `yc-bench company status` — check your current prestige levels", + "2. `yc-bench market browse` — find tasks you can accept (use `--required-prestige-lte N` matching your prestige)", + "3. `yc-bench task accept --task-id ` — accept 2-3 suitable tasks", + "4. `yc-bench employee list` — get employee IDs", + "5. `yc-bench task assign --task-id --employee-id ` — assign employees", + "6. `yc-bench task dispatch --task-id ` — start work on each assigned task", + "7. `yc-bench sim resume` — advance time to collect the first task completion event", "", "Do not spend multiple turns just browsing. Accept and dispatch tasks immediately.", ] diff --git a/src/yc_bench/cli/sim_commands.py b/src/yc_bench/cli/sim_commands.py index e6bc997..5ceb818 100644 --- a/src/yc_bench/cli/sim_commands.py +++ b/src/yc_bench/cli/sim_commands.py @@ -101,6 +101,40 @@ def sim_resume(): error_output("No simulation found. Run `yc-bench sim init` first.") company = db.query(Company).filter(Company.id == sim_state.company_id).one() + # Block sim resume when no active tasks — advancing idle burns payroll + from sqlalchemy import func + from ..db.models.task import Task, TaskStatus + active_count = db.query(func.count(Task.id)).filter( + Task.company_id == sim_state.company_id, + Task.status == TaskStatus.ACTIVE, + ).scalar() or 0 + + if active_count == 0: + planned_count = db.query(func.count(Task.id)).filter( + Task.company_id == sim_state.company_id, + Task.status == TaskStatus.PLANNED, + ).scalar() or 0 + if planned_count > 0: + json_output({ + "ok": False, + "error": "BLOCKED: You have planned tasks but none are dispatched (active). " + "Assign employees and run `yc-bench task dispatch --task-id ` " + "before calling sim resume. Advancing time now would waste runway.", + "active_tasks": 0, + "planned_tasks": planned_count, + }) + return + else: + json_output({ + "ok": False, + "error": "BLOCKED: No active tasks. Advancing time with no work in progress " + "just burns payroll. Accept a task, assign employees, dispatch it, " + "THEN call sim resume.", + "active_tasks": 0, + "planned_tasks": 0, + }) + return + next_event = fetch_next_event( db=db, company_id=sim_state.company_id, diff --git a/src/yc_bench/core/handlers/task_complete.py b/src/yc_bench/core/handlers/task_complete.py index 7c17677..517b4c1 100644 --- a/src/yc_bench/core/handlers/task_complete.py +++ b/src/yc_bench/core/handlers/task_complete.py @@ -98,8 +98,9 @@ def handle_task_complete(db: Session, event: SimEvent, sim_time) -> TaskComplete EmployeeSkillRate.domain == domain, ).one_or_none() if skill is not None: + boost = skill.rate_domain_per_hour * task.skill_boost_pct skill.rate_domain_per_hour = min( - skill.rate_domain_per_hour + task.skill_boost_pct, + skill.rate_domain_per_hour + boost, Decimal(str(wc.skill_rate_max)), ) diff --git a/src/yc_bench/services/generate_employees.py b/src/yc_bench/services/generate_employees.py index b919057..0c3f8a9 100644 --- a/src/yc_bench/services/generate_employees.py +++ b/src/yc_bench/services/generate_employees.py @@ -43,9 +43,9 @@ def _sample_salary_cents(rng, cfg, tier_name): return sample_right_skew_triangular_int(rng, tier.min_cents, tier.max_cents) -def _sample_domain_rates(rng, max_rate): - """Sample each domain's rate independently from 0 to max_rate.""" - return [round(rng.uniform(0, max_rate), 4) for _ in range(_NUM_DOMAINS)] +def _sample_domain_rates(rng, min_rate, max_rate): + """Sample each domain's rate independently from min_rate to max_rate.""" + return [round(rng.uniform(min_rate, max_rate), 4) for _ in range(_NUM_DOMAINS)] def generate_employees(*, run_seed, count, cfg=None): @@ -68,7 +68,7 @@ def generate_employees(*, run_seed, count, cfg=None): tier_name = tiers[idx - 1] tier_cfg = _tier_by_name(cfg, tier_name) - domain_rates = _sample_domain_rates(rng, max_rate=tier_cfg.rate_max) + domain_rates = _sample_domain_rates(rng, min_rate=tier_cfg.rate_min, max_rate=tier_cfg.rate_max) rates = dict(zip(_ALL_DOMAINS, domain_rates)) employees.append( diff --git a/system_design/00_overview.md b/system_design/00_overview.md index 1d1a7d0..775c10d 100644 --- a/system_design/00_overview.md +++ b/system_design/00_overview.md @@ -12,6 +12,7 @@ An LLM agent is dropped into the role of CEO of a small AI startup. It must: - Assign employees to tasks across 4 technical domains - Manage cash flow (payroll, rewards, penalties) - Build prestige in each domain to unlock higher-tier tasks +- Build trust with clients to unlock better payouts and reduced work requirements - Survive until the simulation horizon ends without going bankrupt ## Key Metrics (~4,975 lines of Python) @@ -96,3 +97,4 @@ An LLM agent is dropped into the role of CEO of a small AI startup. It must: | [08_cli_interface.md](08_cli_interface.md) | CLI command groups and JSON output | | [09_configuration.md](09_configuration.md) | Config schema, presets, and world generation | | [10_runner_orchestration.md](10_runner_orchestration.md) | Benchmark runner, dashboard, and session | +| [11_client_trust.md](11_client_trust.md) | Client trust mechanics, tiers, and reward scaling | diff --git a/system_design/02_data_models.md b/system_design/02_data_models.md index 51e7847..e099739 100644 --- a/system_design/02_data_models.md +++ b/system_design/02_data_models.md @@ -28,8 +28,16 @@ The benchmark uses SQLAlchemy's declarative ORM over SQLite for several reasons: │ │ ├────<┌──────────┐────────┘ │ │ Task │────<┌─────────────────┐ - │ └──────────┘ │ TaskRequirement │ (1 per domain × task) - │ └─────────────────┘ + │ └────┬─────┘ │ TaskRequirement │ (1 per domain × task) + │ │ └─────────────────┘ + │ │ + │ └────>┌──────────┐ + │ │ Client │ (task issuer with hidden multiplier) + │ └────┬─────┘ + │ │ + ├────<┌───────────────┘ + │ │ ClientTrust │ (company ↔ client trust level) + │ └──────────────┘ │ ├────<┌──────────────┐ │ │ SimEvent │ (discrete events queue) @@ -132,6 +140,28 @@ The benchmark uses SQLAlchemy's declarative ORM over SQLite for several reasons: **Design choice**: Many-to-many junction table. An employee can work on multiple tasks (throughput splits), and a task can have multiple employees (parallel progress). +### Client (`models/client.py`) + +| Column | Type | Notes | +|--------|------|-------| +| `id` | UUID (PK) | Auto-generated | +| `name` | String(255) | Client company name (e.g. "Nexus AI") | +| `reward_multiplier` | Float | Hidden per-client bonus [0.7, 2.5], not shown to agent | +| `tier` | String(32) | Agent-visible label: Standard / Premium / Enterprise | +| `specialty_domains` | JSON | List of 1-2 domain strings (e.g. ["research", "training"]) | + +**Design choice**: The `reward_multiplier` is hidden from the agent; only `tier` is visible. This prevents trivially optimal strategy (always pick highest multiplier) and requires the agent to experiment and observe payouts. + +### ClientTrust (`models/client.py`) + +| Column | Type | Notes | +|--------|------|-------| +| `company_id` | UUID (FK, PK) | References Company | +| `client_id` | UUID (FK, PK) | References Client | +| `trust_level` | Numeric(6,3) | Range [0.0, 5.0], default 0.000 | + +**Design choice**: Composite primary key (company_id, client_id) — one trust level per company-client pair. Trust affects both reward scaling and work reduction. See [11_client_trust.md](11_client_trust.md) for full mechanics. + ### SimEvent (`models/event.py`) | Column | Type | Notes | diff --git a/system_design/03_task_system.md b/system_design/03_task_system.md index 95032be..73a39fd 100644 --- a/system_design/03_task_system.md +++ b/system_design/03_task_system.md @@ -51,6 +51,27 @@ def task_accept(task_id): **Design choice**: Prestige check is per-domain. A task requiring prestige 3.0 with requirements in `research` and `inference` needs prestige >= 3.0 in BOTH domains. This prevents gaming by maxing one domain. +### Trust Gating at Accept Time + +~20% of tasks have a `required_trust` field. At acceptance, the agent's trust with the task's client must meet the threshold: + +```python +if task.required_trust > 0 and task.client_id: + client_trust = get_trust(company_id, task.client_id) + if client_trust < task.required_trust: + reject("Insufficient trust with client") +``` + +**Design choice**: Trust gating is per-client, not global. High-trust tasks are the most valuable opportunities, gated behind relationship-building with specific clients. See [11_client_trust.md](11_client_trust.md) for full trust mechanics. + +### Client Assignment and Reward Scaling + +Each task belongs to a specific client. At acceptance: + +1. **Reward scaling**: `actual_reward = listed_reward × trust_multiplier` (50% at trust 0, scaling up with trust and client tier) +2. **Work reduction**: `required_qty *= (1 - trust_work_reduction_max × trust/trust_max)` (up to 40% less work at max trust) +3. **Replacement generation**: A new market task replaces the accepted one, biased toward the same client's specialty domains + ### Cancel Penalties Cancelling an active task incurs: @@ -142,3 +163,9 @@ The `market browse` command supports: - Pagination (offset/limit) All output is JSON for agent consumption. + +### Sim Resume Blocking + +`yc-bench sim resume` is **blocked** when there are zero active tasks, returning `{"ok": false}` instead of advancing time. This prevents catastrophic payroll drain when the agent has no work in progress. The agent loop filters blocked responses and treats them as no-ops. + +The auto-advance mechanism (which forces `sim resume` after N consecutive turns without one) also checks for active tasks before advancing. diff --git a/system_design/04_prestige_system.md b/system_design/04_prestige_system.md index 32ea160..1c9dc30 100644 --- a/system_design/04_prestige_system.md +++ b/system_design/04_prestige_system.md @@ -121,3 +121,14 @@ The prestige system creates several key strategic tensions: 4. **Accept vs. Defer**: Taking a task you might fail risks prestige loss; waiting risks decay These tensions make the benchmark more than just "do tasks fast" -- it tests genuine strategic reasoning. + +## Interaction with Client Trust + +Prestige and trust are complementary progression axes: + +- **Prestige** gates which tasks you *can access* (required_prestige per domain) +- **Trust** determines how *profitable* those tasks are (reward scaling + work reduction) +- **Client specialties** bridge the two: clients with specialties in your high-prestige domains offer tasks you can complete quickly, building trust faster +- **Domain alignment** creates a strategic lever: picking clients whose specialties match your prestige strengths compounds both progression axes + +See [11_client_trust.md](11_client_trust.md) for full trust mechanics. diff --git a/system_design/10_runner_orchestration.md b/system_design/10_runner_orchestration.md index ebcdfbf..619c7c9 100644 --- a/system_design/10_runner_orchestration.md +++ b/system_design/10_runner_orchestration.md @@ -134,6 +134,50 @@ class RunSession: **Design choice**: Session object encapsulates all run-specific state, making it easy to serialize and manage runs. +## Bot Runner Baselines (`scripts/bot_runner.py`) + +The bot runner provides deterministic heuristic baselines that operate under the **same constraints** as the LLM agent: + +- Same market visibility (browse limit of 50, prestige/trust gating) +- Same economic rules (trust multiplier, work reduction, payroll, salary bumps) +- Same sim resume blocking (no time advance without active tasks) +- Direct DB access (bypasses CLI parsing overhead but applies identical logic) + +### Available Strategies + +| Strategy | Selection Heuristic | +|----------|-------------------| +| `greedy` | Highest reward among accessible tasks | +| `random` | Random selection (deterministic via seeded RNG) | +| `throughput` | Highest reward per estimated completion hour | +| `prestige` | Phase 1 (prestige < 5): fastest prestige gain. Phase 2: throughput | + +### Greedy Baseline Design + +The greedy bot is the **"zero strategy" floor** that any competent LLM agent should beat: + +- **Sequential execution**: 1 task at a time (`MAX_CONCURRENT_TASKS = 1`) +- **1 task accepted per turn**: Mirrors the LLM's effective pace (browse → accept → assign → dispatch = ~1 task/turn) +- **All employees assigned**: Every employee works on the single active task +- **Prestige-aware browsing**: Filters market by `required_prestige <= floor(max_prestige)`, sorted by reward DESC +- **No completable filter**: All accessible tasks are candidates (blind to actual completion probability) +- **Tier-average rate estimation**: Uses `E[uniform(0, max_rate)]` per tier for ETA estimates (same information the LLM has) +- **Trust/prestige gating**: Respects the same acceptance requirements as the LLM + +**Design choice**: The greedy bot is intentionally simple — it has no workload management, no client strategy, no domain alignment, and no long-term planning. It picks the highest-paying task it can access and throws all resources at it. This makes it a reliable floor: if an LLM agent can't beat "always pick the biggest number," the agent isn't adding strategic value. + +### Usage + +```bash +# Single strategy/config/seed +uv run python scripts/bot_runner.py --bot greedy --config medium --seed 1 + +# All strategies × all configs × all seeds +uv run python scripts/bot_runner.py +``` + +Output is written to `results/yc_bench_result_{config}_{seed}_{bot_slug}.json` in the same format as LLM runs, enabling direct comparison in plots. + ## Batch Running (`scripts/`) ### Multi-Seed Runs diff --git a/system_design/11_client_trust.md b/system_design/11_client_trust.md new file mode 100644 index 0000000..459eee7 --- /dev/null +++ b/system_design/11_client_trust.md @@ -0,0 +1,194 @@ +# Client Trust System + +**Location**: `src/yc_bench/db/models/client.py`, `src/yc_bench/services/generate_clients.py`, `src/yc_bench/core/handlers/task_complete.py`, `src/yc_bench/cli/client_commands.py` + +## Overview + +Client trust is YC-Bench's second progression axis alongside prestige. While prestige gates *which tasks you can access*, trust determines *how profitable those tasks are*. Every task is offered by a specific client (e.g. "Nexus AI", "Vertex Labs"). Building trust with a client increases payouts and reduces work required, creating a compounding loop that rewards focused relationship-building over scattered effort. + +## Design Goals + +The trust system was designed to create **genuine strategic diversity** where multiple strategies are viable and no single approach clearly dominates: + +| Strategy | Description | Risk | Ceiling | +|----------|-------------|------|---------| +| Domain-aligned focus | Pick clients whose specialties match prestige strengths | Low | Medium-High | +| High-tier gamble | Enterprise clients despite domain mismatch | High | Highest | +| Conservative | Standard-tier, right domains, profitable day 1 | Lowest | Medium | +| Diversified | 3-4 clients, broad coverage | Medium | Medium | +| Trust investor | Cheap tasks from high-tier to build trust early | Medium | High | + +## Clients + +### Generation (`generate_clients.py`) + +Clients are generated at world-seeding time with seeded RNG: + +- **Count**: 8 clients (configurable via `num_clients`) +- **Names**: Drawn from a pool of 15 AI company names (e.g. "Nexus AI", "Cipher Corp") +- **Reward multiplier**: `triangular(0.7, 2.5, mode=1.0)` — hidden from the agent +- **Tier**: Derived from multiplier (visible to the agent) +- **Specialty domains**: 1-2 domains per client (60% get 1, 40% get 2) + +### Tiers + +Tiers are the agent-visible proxy for the hidden reward multiplier: + +| Tier | Multiplier Range | Meaning | +|------|-----------------|---------| +| Standard | [0.7, 1.0) | Lower reward ceiling but safer early | +| Premium | [1.0, 1.7) | Moderate scaling | +| Enterprise | [1.7, 2.5] | Highest ceiling but requires high trust to be profitable | + +**Design choice**: The exact multiplier is hidden. The agent sees only the tier label via `yc-bench client list`. This prevents the trivial strategy of "always pick the highest multiplier" and requires experimentation to discover which clients are most valuable. + +### Specialty Domains + +Each client has 1-2 specialty domains (e.g. "research", "training"). Tasks from a client are biased toward their specialties: + +- **70% chance** the first domain requirement is a specialty domain +- **30% chance** it's random + +This creates domain alignment as a strategic lever — a Premium client whose specialties match your prestige strengths may outperform an Enterprise client in domains where you're weak. + +## Trust Mechanics + +### Trust Level + +Trust is tracked per (company, client) pair in the `ClientTrust` table. Range: [0.0, 5.0]. + +### Trust Gain (on task success) + +``` +gain = trust_gain_base × (1 - trust/trust_max)^trust_gain_diminishing_power +``` + +Default parameters: +- `trust_gain_base`: 0.40 +- `trust_gain_diminishing_power`: 1.5 +- `trust_max`: 5.0 + +Diminishing returns mean early trust builds fast (~0.40 per task at trust 0) but slows significantly as trust approaches max (~0.07 per task at trust 4). + +### Trust Loss + +| Event | Penalty | +|-------|---------| +| Task failure (late) | -0.3 trust | +| Task cancellation | -0.5 trust | + +### Trust Decay + +Trust decays daily at `trust_decay_per_day` (default: 0.015/day). Inactive client relationships erode over time, requiring continued work to maintain. + +### Cross-Client Decay + +Completing a task for Client A reduces trust with *all other clients* by `trust_cross_client_decay` (default: 0.03). This models exclusivity pressure — clients notice when you spread attention thin. It penalizes scattered work and rewards focusing on 2-3 key clients. + +## Reward Scaling + +### Trust Reward Formula + +``` +actual_reward = listed_reward × trust_multiplier + +trust_multiplier = trust_base_multiplier + client_mult² × trust_reward_scale × trust² / trust_max +``` + +Default parameters: +- `trust_base_multiplier`: 0.50 (everyone starts at 50% of listed reward) +- `trust_reward_scale`: 0.25 +- `trust_max`: 5.0 + +At trust 0, all clients pay 50% of listed reward regardless of tier. At max trust: + +| Tier | Example Mult | Trust Multiplier at trust=5 | +|------|-------------|---------------------------| +| Standard | 0.85 | 0.50 + 0.72 × 0.25 × 5 = 1.40 | +| Premium | 1.3 | 0.50 + 1.69 × 0.25 × 5 = 2.61 | +| Enterprise | 2.0 | 0.50 + 4.0 × 0.25 × 5 = 5.50 | + +**Design choice**: The quadratic scaling on both multiplier and trust creates dramatic tier separation at high trust while keeping all clients roughly equivalent at low trust. Enterprise clients are actually *worse* than Standard at trust 0 (same 50% payout, but harder tasks due to specialty mismatch), making them a genuine investment gamble. + +### Work Reduction + +``` +work_reduction = trust_work_reduction_max × trust / trust_max +``` + +Default `trust_work_reduction_max`: 0.40 (up to 40% less work at max trust). + +Applied at task acceptance: each domain's `required_qty` is multiplied by `(1 - work_reduction)`. This compounds with higher rewards — at high trust you earn more in less time. + +**Design choice**: Work reduction represents "trusted clients give clearer specs." This creates the compounding loop: trust → less work → faster completion → more tasks per month → more trust → even better returns. + +## Trust Gating + +~20% of tasks have a `required_trust` field (sampled from `triangular(1, 5, mode=2)`). The agent cannot accept these tasks unless trust with the task's client meets the threshold. + +```python +if task.required_trust > 0: + if client_trust < task.required_trust: + reject("Insufficient trust with client") +``` + +**Design choice**: Trust-gated tasks are the highest-value opportunities. They ensure that building trust is not just about better payouts but also about unlocking premium work that's invisible to low-trust agents. + +## Sim Resume Blocking + +To prevent catastrophic payroll drain when the agent has no active work, `sim resume` is **blocked** when there are zero active tasks: + +```python +# In sim_commands.py +if active_count == 0: + return {"ok": False, "error": "BLOCKED: No active tasks..."} +``` + +The agent loop filters blocked responses (those with `ok: False`) and treats them as no-ops rather than time advances. The auto-advance mechanism in the loop also checks for active tasks before forcing time forward. + +**Design choice**: Without this guard, an LLM agent calling `sim resume` while idle would skip months of payroll with zero revenue — a catastrophic and unrecoverable error. The block forces the agent to accept/dispatch work before time can advance. + +## Agent Visibility + +The agent sees the following via `yc-bench client list`: + +```json +{ + "client_id": "uuid", + "name": "Nexus AI", + "trust_level": 1.234, + "tier": "Enterprise", + "specialties": ["research", "training"] +} +``` + +**Not visible**: exact reward multiplier, trust formula parameters, cross-client decay rate. + +Tasks in `market browse` show `client_name` and `required_trust`. The agent must infer client value by observing actual payouts over time. + +## Configuration + +All trust parameters are in `WorldConfig` (see `config/schema.py`): + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `num_clients` | 8 | Number of clients | +| `trust_max` | 5.0 | Maximum trust level | +| `trust_min` | 0.0 | Minimum trust level | +| `trust_gain_base` | 0.40 | Base trust gain per success | +| `trust_gain_diminishing_power` | 1.5 | Diminishing returns exponent | +| `trust_fail_penalty` | 0.3 | Trust lost on task failure | +| `trust_cancel_penalty` | 0.5 | Trust lost on task cancellation | +| `trust_decay_per_day` | 0.015 | Daily trust decay | +| `trust_cross_client_decay` | 0.03 | Trust erosion with other clients per task | +| `trust_base_multiplier` | 0.50 | Starting reward fraction (all clients) | +| `trust_reward_scale` | 0.25 | Trust reward scaling factor | +| `trust_work_reduction_max` | 0.40 | Max work reduction at max trust | + +## Strategic Implications + +1. **Focus vs. Diversify**: Cross-client decay penalizes spreading thin, but relying on one client is risky if their specialty doesn't match your prestige growth +2. **Tier vs. Domain**: An Enterprise client in the wrong domain may underperform a Premium client in the right domain +3. **Early vs. Late**: Standard clients are more profitable early (same 50% payout, less specialty mismatch), while Enterprise clients only shine at high trust +4. **Trust as Investment**: Early tasks for a high-tier client are effectively loss-leaders — you earn below-market rates to build a relationship that compounds later +5. **Hidden Information**: The agent must experiment and observe payouts to discover which clients are truly valuable, creating an exploration-exploitation tradeoff