Capped skill rate at 10 + removed reward mult from clients

2026-04-19 12:58:03 +00:00 · 2026-03-16 16:09:17 -07:00 · 2026-03-16 16:09:17 -07:00 · 140bb58653
commit 140bb58653
parent d976b9cbb4
11 changed files with 64 additions and 148 deletions
--- a/scripts/bot_runner.py
+++ b/scripts/bot_runner.py
@ -256,6 +256,7 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn)
            horizon_years=cfg.sim.horizon_years,
            employee_count=world_cfg.num_employees,
            market_task_count=world_cfg.num_market_tasks,
+            cfg=world_cfg,
            start_date=start_dt,
        )
        result = seed_world_transactional(db, req)
@ -333,22 +334,14 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn)
                    TaskRequirement.task_id == task.id
                ).all()

-                # Apply trust reward multiplier and work reduction
+                # Apply trust work reduction (no reward multiplier)
                if task.client_id is not None:
-                    from yc_bench.db.models.client import Client, ClientTrust
-                    client_row = db.query(Client).filter(Client.id == task.client_id).one_or_none()
-                    client_multiplier = client_row.reward_multiplier if client_row else 1.0
+                    from yc_bench.db.models.client import ClientTrust
                    ct = db.query(ClientTrust).filter(
                        ClientTrust.company_id == company_id,
                        ClientTrust.client_id == task.client_id,
                    ).one_or_none()
                    trust_level = float(ct.trust_level) if ct else 0.0
-                    trust_multiplier = (
-                        world_cfg.trust_base_multiplier
-                        + (client_multiplier ** 2) * world_cfg.trust_reward_scale
-                        * (trust_level ** 2) / world_cfg.trust_max
-                    )
-                    task.reward_funds_cents = int(task.reward_funds_cents * trust_multiplier)
                    work_reduction = world_cfg.trust_work_reduction_max * (trust_level / world_cfg.trust_max)
                    for r in reqs:
                        r.required_qty = int(float(r.required_qty) * (1 - work_reduction))
--- a/src/yc_bench/agent/prompt.py
+++ b/src/yc_bench/agent/prompt.py
@ -40,19 +40,6 @@ Your goal is to maximize company prestige and funds over the simulation horizon
 - `yc-bench task cancel --task-id <UUID> --reason "text"` — cancel a task (prestige penalty: 1.2x reward delta)
 - `yc-bench sim resume` — advance simulation to the next checkpoint event and return wake events

-## Strategy Guidelines
-
-1. **Check company status first** to understand your financial position, runway, and **current prestige levels per domain**.
-2. **Browse the market at your prestige level** — use `--required-prestige-lte N` where N matches your highest prestige. Higher-prestige tasks pay significantly more (prestige-5 tasks pay ~2.2x more than prestige-1). As your prestige grows, ALWAYS increase your browse filter to find better-paying tasks.
-3. **Accept tasks** that match your capabilities and offer good reward-to-risk ratio.
-4. **Assign employees strategically** — employees split throughput across active tasks. Focus employees on fewer tasks for faster completion.
-5. **Dispatch tasks** once assigned, then continue monitoring progress/events via status and reports.
-6. **Monitor deadlines** — completing after deadline causes failure (0.8x prestige penalty). Cancel hopeless tasks early (1.2x penalty, but stops bleeding time).
-7. **Watch payroll** — monthly salaries are deducted automatically. Don't let runway drop to zero.
-8. **Use status checks** to track critical milestones and risks.
-9. **Successful tasks** award funds + prestige + employee skill boosts. Build momentum.
-10. **Scale up over time** — regularly check `yc-bench company status` to see your prestige. Browse higher-prestige tasks as you grow — staying on prestige-1 tasks when you have prestige 5+ leaves enormous revenue on the table.
-
 ## Key Rules

 - Task completion at or before deadline = success (reward funds + prestige + skill boost + client trust gain)
@ -65,31 +52,14 @@ Your goal is to maximize company prestige and funds over the simulation horizon
 ## Client Trust

 - Each task is offered by a specific **client** (e.g. "Nexus AI", "Vertex Labs").
- Trust affects TWO things: **reward** and **work required**.
+- Each client has **specialty domains** (e.g. "research", "training"). Tasks from a client are biased toward their specialties.
+- Use `yc-bench client list` to see each client's specialties and current trust level.

-### Client Tiers and Specialties
- Clients are classified into **tiers**: Standard, Premium, Enterprise. Higher tiers have higher reward potential at high trust.
- Each client has **specialty domains** (e.g. "research", "training"). Tasks from a client are biased toward their specialties — a client specializing in "research" will mostly offer research-heavy tasks.
- Use `yc-bench client list` to see each client's tier, specialties, and current trust level.
-
-### Reward Scaling
- Listed rewards are **potential** — actual payout depends on trust. Without trust, you only receive about **50%** of the listed reward.
- As trust grows, payouts increase significantly. Higher-tier clients scale better but start worse.
- Observe actual payouts over time to gauge each client's true value.
-
-### Work Reduction
- **Trusted clients give clearer specs** — work required shrinks with trust (up to 40% reduction at max trust).
- This compounds with higher rewards: at high trust, you earn more in less time.
-
-### Strategy
- **Domain alignment matters most**: Pick clients whose specialties match your company's prestige strengths. A Premium client aligned with your best domains may outperform an Enterprise client where you're weak.
- **Focus on 2-3 clients** to build trust fast. Scattered work = perpetual low payouts + full work load.
- **Compounding loop**: trust → less work → faster completion → more tasks per month → more trust → even better returns.
- **Higher-tier clients are riskier early**: they pay less than Standard clients at low trust, but become very rewarding at high trust. Standard clients are safer to start with.
+### Mechanics
 - Completing tasks for a client builds **trust** [0.0–5.0]. Trust gains diminish as you approach max.
- **Premium tasks require trust**: High-reward tasks require established trust (required_trust 1-4). Clients don't give their best projects to unproven vendors.
- **Trust decays** daily — relationships need maintenance through continued work.
- **Failures hurt**: -0.3 trust. **Cancellations hurt more**: -0.5 trust.
+- Trusted clients require less work (up to 35% reduction at max trust).
+- Some tasks require minimum trust to accept (required_trust 1-4).
+- Trust decays daily. Task failure and cancellation reduce trust.
 """


--- a/src/yc_bench/cli/sim_commands.py
+++ b/src/yc_bench/cli/sim_commands.py
@ -57,6 +57,7 @@ def sim_init(
            horizon_years=horizon_years,
            employee_count=employee_count,
            market_task_count=market_task_count,
+            cfg=_wc,
            start_date=start_dt,
        )
        result = seed_world_transactional(db, req)
--- a/src/yc_bench/cli/task_commands.py
+++ b/src/yc_bench/cli/task_commands.py
@ -90,19 +90,10 @@ def task_accept(
                    f"does not meet task requirement ({task.required_trust})."
                )

-        # Apply trust reward multiplier and work reduction at accept time
+        # Apply trust work reduction at accept time (no reward multiplier —
+        # faster completion from trust already increases revenue via throughput).
        _cfg = _get_world_cfg()
        if task.client_id is not None:
-            client_row = db.query(Client).filter(Client.id == task.client_id).one_or_none()
-            client_multiplier = client_row.reward_multiplier if client_row else 1.0
-            # Reward: continuous formula
-            trust_multiplier = (
-                _cfg.trust_base_multiplier
-                + (client_multiplier ** 2) * _cfg.trust_reward_scale
-                * (trust_level ** 2) / _cfg.trust_max
-            )
-            task.reward_funds_cents = int(task.reward_funds_cents * trust_multiplier)
-            # Work reduction: trusted clients give clearer specs → less work
            work_reduction = _cfg.trust_work_reduction_max * (trust_level / _cfg.trust_max)
            for r in reqs:
                r.required_qty = int(float(r.required_qty) * (1 - work_reduction))
--- a/src/yc_bench/config/init.py
+++ b/src/yc_bench/config/init.py
@ -5,14 +5,8 @@ from .schema import ExperimentConfig, AgentConfig, LoopConfig, SimConfig, WorldC


 def get_world_config() -> WorldConfig:
-    """Load WorldConfig from the active experiment (YC_BENCH_EXPERIMENT env var, default: 'default').
-
-    Falls back to default WorldConfig if config loading fails (e.g. outside a benchmark run).
-    """
-    try:
-        return load_config(os.environ.get("YC_BENCH_EXPERIMENT", "default")).world
-    except Exception:
-        return WorldConfig()
+    """Load WorldConfig from the active experiment (YC_BENCH_EXPERIMENT env var, default: 'default')."""
+    return load_config(os.environ.get("YC_BENCH_EXPERIMENT", "default")).world


 __all__ = [
--- a/src/yc_bench/config/presets/default.toml
+++ b/src/yc_bench/config/presets/default.toml
@ -63,6 +63,8 @@ market_browse_default_limit  = 50
 # Salary bump per completed task — each assigned employee gets this raise.
 # Compounds over time, accelerating payroll pressure.
 salary_bump_pct = 0.01
+salary_max_cents = 10_000_000   # cap individual salary at $100K/month
+skill_rate_max   = 10.0         # cap employee skill rate

 # --- Prestige mechanics ---
 prestige_max  = 10.0
--- a/src/yc_bench/config/schema.py
+++ b/src/yc_bench/config/schema.py
@ -107,50 +107,44 @@ class SimConfig(BaseModel):
 # ---------------------------------------------------------------------------

 class WorldConfig(BaseModel):
+    """All world-generation parameters.
+
+    No defaults — every field must be set explicitly in the TOML preset.
+    This prevents silent drift between schema.py and the preset files.
+    """
+
    # --- Workforce ---
-    num_employees: int = 10
-    initial_funds_cents: int = 15_000_000    # $150,000
-    initial_prestige_level: float = 1.0
-    work_hours_per_day: float = 9.0
+    num_employees: int
+    initial_funds_cents: int
+    initial_prestige_level: float
+    work_hours_per_day: float

    # --- Market ---
-    num_market_tasks: int = 500
-    market_browse_default_limit: int = 50
+    num_market_tasks: int
+    market_browse_default_limit: int

    # --- Salary bump on task completion ---
-    salary_bump_pct: float = 0.01    # 1% raise per assigned employee per completed task
-    salary_max_cents: int = 10_000_000  # cap individual salary at $100K/month
-    skill_rate_max: float = 30.0  # cap employee skill rate (prevents exponential skill compounding)
+    salary_bump_pct: float
+    salary_max_cents: int
+    skill_rate_max: float

    # --- Prestige mechanics ---
-    prestige_max: float = 10.0
-    prestige_min: float = 1.0
-    penalty_fail_multiplier: float = 0.8
-    penalty_cancel_multiplier: float = 1.2
-    # Extra reward fraction per prestige level above 1.
-    # At 0.55: prestige-8 tasks pay ~4.85x more than prestige-1.
-    reward_prestige_scale: float = 0.3
-
-    # Daily prestige decay per domain. Domains not exercised lose prestige
-    # over time: -0.01/day → -0.3/month → untouched domain drops ~1 level
-    # every ~3 months. Floored at prestige_min.
-    prestige_decay_per_day: float = 0.005
+    prestige_max: float
+    prestige_min: float
+    penalty_fail_multiplier: float
+    penalty_cancel_multiplier: float
+    reward_prestige_scale: float
+    prestige_decay_per_day: float

    # --- Client trust (intuitive knobs) ---
-    num_clients: int = 8
-    trust_max: float = 5.0
-    # ~how many successful tasks to reach 80% of max trust with one client
-    trust_build_rate: float = 20.0
-    # 0-1: how punishing failures/inactivity are (0=forgiving, 1=harsh)
-    trust_fragility: float = 0.5
-    # 0-1: how much working for one client hurts trust with others (0=none, 1=heavy)
-    trust_focus_pressure: float = 0.5
-    # payout multiplier a typical Premium client (mult≈1.3) gives at max trust
-    trust_reward_ceiling: float = 2.6
-    # max work reduction at max trust (0.4 = 40% less work)
-    trust_work_reduction_max: float = 0.40
-    # fraction of tasks that require trust (~0.2 = 20%)
-    trust_gating_fraction: float = 0.20
+    num_clients: int
+    trust_max: float
+    trust_build_rate: float
+    trust_fragility: float
+    trust_focus_pressure: float
+    trust_reward_ceiling: float
+    trust_work_reduction_max: float
+    trust_gating_fraction: float

    # --- Derived trust params (computed from knobs above, do not set directly) ---
    trust_min: float = 0.0
@ -175,46 +169,25 @@ class WorldConfig(BaseModel):
    client_tier_enterprise_threshold: float = 1.7
    task_specialty_domain_bias: float = 0.7

-    # Required qty scaling by prestige: qty *= 1 + prestige_qty_scale * (prestige - 1).
-    # At 0.3: prestige-5 tasks need 2.2× the work of prestige-1 tasks.
-    prestige_qty_scale: float = 0.3
+    # --- Task scaling ---
+    prestige_qty_scale: float
+    deadline_qty_per_day: float
+    deadline_min_biz_days: int

-    # --- Deadline computation ---
-    deadline_qty_per_day: float = 150.0  # max per-domain qty / this = deadline days
-    deadline_min_biz_days: int = 7
-
-    # --- Progress milestones (fraction thresholds that trigger checkpoint events) ---
-    task_progress_milestones: list[float] = Field(default_factory=lambda: [0.25, 0.5, 0.75])
+    # --- Progress milestones ---
+    task_progress_milestones: list[float]

    # --- Business hours ---
-    workday_start_hour: int = 9
-    workday_end_hour: int = 18
+    workday_start_hour: int
+    workday_end_hour: int

-    # --- Distributions (shape of random draws during world generation) ---
+    # --- Distributions ---
    dist: WorldDists = Field(default_factory=WorldDists)

    # --- Salary tiers ---
-    salary_junior: SalaryTierConfig = Field(
-        default_factory=lambda: SalaryTierConfig(
-            name="junior", share=0.50,
-            min_cents=200_000, max_cents=400_000,
-            rate_min=1.0, rate_max=4.0,
-        )
-    )
-    salary_mid: SalaryTierConfig = Field(
-        default_factory=lambda: SalaryTierConfig(
-            name="mid", share=0.35,
-            min_cents=600_000, max_cents=800_000,
-            rate_min=4.0, rate_max=7.0,
-        )
-    )
-    salary_senior: SalaryTierConfig = Field(
-        default_factory=lambda: SalaryTierConfig(
-            name="senior", share=0.15,
-            min_cents=1_000_000, max_cents=1_500_000,
-            rate_min=7.0, rate_max=10.0,
-        )
-    )
+    salary_junior: SalaryTierConfig
+    salary_mid: SalaryTierConfig
+    salary_senior: SalaryTierConfig

    @model_validator(mode="after")
    def _derive_trust_params(self) -> WorldConfig:
--- a/src/yc_bench/services/generate_clients.py
+++ b/src/yc_bench/services/generate_clients.py
@ -44,10 +44,8 @@ class GeneratedClient:
    specialty_domains: list[str] = field(default_factory=list)


-def generate_clients(*, run_seed: int, count: int, cfg: WorldConfig | None = None) -> list[GeneratedClient]:
+def generate_clients(*, run_seed: int, count: int, cfg: WorldConfig) -> list[GeneratedClient]:
    """Generate clients with seeded reward multipliers, tiers, and specialty domains."""
-    if cfg is None:
-        cfg = WorldConfig()
    if count <= 0:
        return []
    if count > len(_CLIENT_NAME_POOL):
--- a/src/yc_bench/services/generate_employees.py
+++ b/src/yc_bench/services/generate_employees.py
@ -48,9 +48,7 @@ def _sample_domain_rates(rng, min_rate, max_rate):
    return [round(rng.uniform(min_rate, max_rate), 4) for _ in range(_NUM_DOMAINS)]


-def generate_employees(*, run_seed, count, cfg=None):
-    if cfg is None:
-        cfg = WorldConfig()
+def generate_employees(*, run_seed, count, cfg):
    if count <= 0:
        return []

@ -83,7 +81,7 @@ def generate_employees(*, run_seed, count, cfg=None):
    return employees


-def build_employee_rows(*, run_seed, company_id, count, cfg=None):
+def build_employee_rows(*, run_seed, company_id, count, cfg):
    generated = generate_employees(run_seed=run_seed, count=count, cfg=cfg)
    employee_rows = []
    skill_rows = []
--- a/src/yc_bench/services/generate_tasks.py
+++ b/src/yc_bench/services/generate_tasks.py
@ -150,15 +150,13 @@ def _make_task(rng, cfg, prestige, serial, requirements, client_index=0):
    )


-def generate_tasks(*, run_seed, count, cfg=None, client_specialties=None):
+def generate_tasks(*, run_seed, count, cfg, client_specialties=None):
    """Generate market tasks.

    Args:
        client_specialties: list of specialty domain lists, one per client index.
            e.g. [["research", "training"], ["inference"]] for 2 clients.
    """
-    if cfg is None:
-        cfg = WorldConfig()
    if count <= 0:
        return []

@ -176,7 +174,7 @@ def generate_tasks(*, run_seed, count, cfg=None, client_specialties=None):
    return out


-def build_task_rows(*, run_seed, count, cfg=None):
+def build_task_rows(*, run_seed, count, cfg):
    generated = generate_tasks(run_seed=run_seed, count=count, cfg=cfg)
    task_rows = []
    requirement_rows = []
@ -208,10 +206,8 @@ def build_task_rows(*, run_seed, count, cfg=None):
    return task_rows, requirement_rows


-def generate_replacement_task(*, run_seed, replenish_counter, replaced_prestige, replaced_client_index=0, cfg=None, specialty_domains=None):
+def generate_replacement_task(*, run_seed, replenish_counter, replaced_prestige, replaced_client_index=0, cfg, specialty_domains=None):
    """Generate a replacement task with the same prestige and client as the accepted task."""
-    if cfg is None:
-        cfg = WorldConfig()
    streams = RngStreams(run_seed)
    rng = streams.stream(f"replenish_{replenish_counter}")
    requirements = _sample_requirements(rng, cfg, prestige=replaced_prestige, specialty_domains=specialty_domains)
--- a/src/yc_bench/services/seed_world.py
+++ b/src/yc_bench/services/seed_world.py
@ -26,8 +26,8 @@ class SeedWorldRequest:
    horizon_years: int
    employee_count: int
    market_task_count: int
+    cfg: WorldConfig
    start_date: datetime | None = None
-    cfg: WorldConfig = field(default_factory=WorldConfig)


@dataclass(frozen=True)