improved system design, more intuitive hparams, updated configs, greedy bot updates

2026-04-30 17:40:40 +00:00 · 2026-03-12 12:12:47 -07:00 · 2026-03-12 12:12:47 -07:00 · 70ae316f27
commit 70ae316f27
parent 3d20bee609
13 changed files with 460 additions and 425 deletions
--- a/src/yc_bench/agent/prompt.py
+++ b/src/yc_bench/agent/prompt.py
@ -59,7 +59,7 @@ Your goal is to maximize company prestige and funds over the simulation horizon
 - Task completion after deadline = failure (0.8x prestige penalty, no reward, trust penalty)
 - Task cancellation = 1.2x prestige penalty per domain + trust penalty (worse than failure)
 - Employee throughput = base_rate / number_of_active_tasks_assigned
- Time advances only when you run `yc-bench sim resume`
+- Time advances only when you run `yc-bench sim resume`. **Note**: `sim resume` is blocked if you have no active (dispatched) tasks — you must accept, assign, and dispatch at least one task before time can advance.
 - Prestige is clamped [1, 10]. Funds are in cents.

 ## Client Trust
@ -149,13 +149,13 @@ def build_turn_context(
    if active_tasks == 0 and planned_tasks == 0:
        parts.append(
            "\n**ACTION REQUIRED**: No tasks are running. "
-            "Accept a task, assign employees to it, dispatch it, then call `yc-bench sim resume`. "
-            "Do this now — every turn without active tasks burns runway."
+            "`sim resume` is BLOCKED until you have active tasks. "
+            "Accept a task, assign employees to it, and dispatch it now."
        )
    elif planned_tasks > 0 and active_tasks == 0:
        parts.append(
            "\n**ACTION REQUIRED**: You have planned tasks but none are dispatched. "
-            "Assign employees and dispatch now, then call `yc-bench sim resume`."
+            "`sim resume` is BLOCKED until you dispatch. Assign employees and dispatch now."
        )
    else:
        parts.append("\nDecide your next actions. Use `run_command` to execute CLI commands.")
--- a/src/yc_bench/config/presets/default.toml
+++ b/src/yc_bench/config/presets/default.toml
@ -77,6 +77,22 @@ penalty_cancel_multiplier  = 2.0    # hardened: was 1.2
 # At 0.55: a prestige-8 task pays ~4.85x more than a prestige-1 task.
 reward_prestige_scale = 0.55    # hardened: was 0.3

+# --- Client trust ---
+# trust_build_rate:       ~tasks to reach 80% max trust (higher = slower)
+# trust_fragility:        0-1, how punishing failures/inactivity are
+# trust_focus_pressure:   0-1, penalty for spreading work across clients
+# trust_reward_ceiling:   payout multiplier a Premium client gives at max trust
+# trust_work_reduction_max: max work reduction at max trust
+# trust_gating_fraction:  fraction of tasks that require established trust
+num_clients              = 8
+trust_max                = 5.0
+trust_build_rate         = 20.0
+trust_fragility          = 0.5
+trust_focus_pressure     = 0.5
+trust_reward_ceiling     = 2.6
+trust_work_reduction_max = 0.40
+trust_gating_fraction    = 0.20
+
 # Daily prestige decay per domain. Domains not exercised lose prestige
 # over time: -0.005/day → -0.15/month. Untouched domain drops ~1 level
 # every ~6 months. Prevents single-domain hyper-specialization.
--- a/src/yc_bench/config/presets/easy.toml
+++ b/src/yc_bench/config/presets/easy.toml
@ -44,6 +44,14 @@ salary_bump_pct = 0.005
 # Low reward scaling — prestige climbing not yet necessary.
 reward_prestige_scale = 0.3

+# --- Client trust (forgiving: builds fairly fast, mild penalties) ---
+trust_build_rate         = 15.0
+trust_fragility          = 0.3
+trust_focus_pressure     = 0.3
+trust_reward_ceiling     = 2.8
+trust_work_reduction_max = 0.40
+trust_gating_fraction    = 0.15
+
 [world.dist.required_prestige]
 type = "triangular"
 low  = 1
--- a/src/yc_bench/config/presets/hard.toml
+++ b/src/yc_bench/config/presets/hard.toml
@ -58,6 +58,14 @@ salary_bump_pct = 0.01
 # High-prestige tasks pay substantially more.
 reward_prestige_scale = 0.55

+# --- Client trust (harsh: slow build, heavy penalties, lower ceiling) ---
+trust_build_rate         = 25.0
+trust_fragility          = 0.7
+trust_focus_pressure     = 0.7
+trust_reward_ceiling     = 2.4
+trust_work_reduction_max = 0.35
+trust_gating_fraction    = 0.25
+
 [world.dist.required_prestige]
 type = "triangular"
 low  = 1
--- a/src/yc_bench/config/presets/medium.toml
+++ b/src/yc_bench/config/presets/medium.toml
@ -51,6 +51,14 @@ salary_bump_pct = 0.01
 # Prestige scaling starting to reward climbing.
 reward_prestige_scale = 0.45

+# --- Client trust (balanced: default build speed, moderate penalties) ---
+trust_build_rate         = 20.0
+trust_fragility          = 0.5
+trust_focus_pressure     = 0.5
+trust_reward_ceiling     = 2.6
+trust_work_reduction_max = 0.40
+trust_gating_fraction    = 0.20
+
 [world.dist.required_prestige]
 type = "triangular"
 low  = 1
--- a/src/yc_bench/config/presets/nightmare.toml
+++ b/src/yc_bench/config/presets/nightmare.toml
@ -67,6 +67,14 @@ salary_bump_pct = 0.02
 # This is what makes the prestige climb existentially necessary.
 reward_prestige_scale = 0.7

+# --- Client trust (brutal: very slow build, severe penalties, tight ceiling) ---
+trust_build_rate         = 30.0
+trust_fragility          = 0.9
+trust_focus_pressure     = 0.8
+trust_reward_ceiling     = 2.2
+trust_work_reduction_max = 0.30
+trust_gating_fraction    = 0.30
+
 [world.dist.required_prestige]
 type = "triangular"
 low  = 1
--- a/src/yc_bench/config/presets/tutorial.toml
+++ b/src/yc_bench/config/presets/tutorial.toml
@ -44,6 +44,14 @@ salary_bump_pct = 0.0
 # Mild reward scaling — no need to climb prestige.
 reward_prestige_scale = 0.2

+# --- Client trust (very forgiving: builds fast, low penalties, generous payoff) ---
+trust_build_rate         = 10.0
+trust_fragility          = 0.2
+trust_focus_pressure     = 0.2
+trust_reward_ceiling     = 3.0
+trust_work_reduction_max = 0.40
+trust_gating_fraction    = 0.10
+
 [world.dist.required_prestige]
 type = "constant"
 value = 1        # ALL tasks are prestige-1 — no gating at all.
--- a/src/yc_bench/config/schema.py
+++ b/src/yc_bench/config/schema.py
@ -136,19 +136,44 @@ class WorldConfig(BaseModel):
    # every ~3 months. Floored at prestige_min.
    prestige_decay_per_day: float = 0.005

-    # --- Client trust ---
+    # --- Client trust (intuitive knobs) ---
    num_clients: int = 8
    trust_max: float = 5.0
+    # ~how many successful tasks to reach 80% of max trust with one client
+    trust_build_rate: float = 20.0
+    # 0-1: how punishing failures/inactivity are (0=forgiving, 1=harsh)
+    trust_fragility: float = 0.5
+    # 0-1: how much working for one client hurts trust with others (0=none, 1=heavy)
+    trust_focus_pressure: float = 0.5
+    # payout multiplier a typical Premium client (mult≈1.3) gives at max trust
+    trust_reward_ceiling: float = 2.6
+    # max work reduction at max trust (0.4 = 40% less work)
+    trust_work_reduction_max: float = 0.40
+    # fraction of tasks that require trust (~0.2 = 20%)
+    trust_gating_fraction: float = 0.20
+
+    # --- Derived trust params (computed from knobs above, do not set directly) ---
    trust_min: float = 0.0
-    trust_gain_base: float = 0.40
+    trust_gain_base: float = 0.0
    trust_gain_diminishing_power: float = 1.5
-    trust_fail_penalty: float = 0.3
-    trust_cancel_penalty: float = 0.5
-    trust_decay_per_day: float = 0.015
-    trust_cross_client_decay: float = 0.03  # completing work for Client A erodes trust with other clients
-    trust_base_multiplier: float = 0.50   # all clients start at 50% of listed reward
-    trust_reward_scale: float = 0.25      # reward = listed × (base + client_mult² × scale × trust²/trust_max)
-    trust_work_reduction_max: float = 0.40  # trusted clients give clearer specs → up to 40% less work at max trust
+    trust_fail_penalty: float = 0.0
+    trust_cancel_penalty: float = 0.0
+    trust_decay_per_day: float = 0.0
+    trust_cross_client_decay: float = 0.0
+    trust_base_multiplier: float = 0.50
+    trust_reward_scale: float = 0.0
+    trust_reward_threshold: float = 0.0
+    trust_reward_ramp: float = 0.0
+    trust_level_reward_scale: float = 3.0
+    trust_level_max_required: int = 4
+    trust_gated_reward_boost: float = 0.15
+    client_reward_mult_low: float = 0.7
+    client_reward_mult_high: float = 2.5
+    client_reward_mult_mode: float = 1.0
+    client_single_specialty_prob: float = 0.6
+    client_tier_premium_threshold: float = 1.0
+    client_tier_enterprise_threshold: float = 1.7
+    task_specialty_domain_bias: float = 0.7

    # Required qty scaling by prestige: qty *= 1 + prestige_qty_scale * (prestige - 1).
    # At 0.3: prestige-5 tasks need 2.2× the work of prestige-1 tasks.
@ -191,6 +216,45 @@ class WorldConfig(BaseModel):
        )
    )

+    @model_validator(mode="after")
+    def _derive_trust_params(self) -> WorldConfig:
+        """Derive detailed trust parameters from the intuitive knobs.
+
+        Derivation preserves default behavior: trust_build_rate=20, fragility=0.5,
+        focus_pressure=0.5, reward_ceiling=2.6 produce the same values as the
+        original hardcoded defaults.
+        """
+        # trust_build_rate → gain_base
+        # Approximate: gain_base ≈ trust_max × 1.6 / build_rate
+        # At default (20): 5.0 × 1.6 / 20 = 0.40
+        self.trust_gain_base = self.trust_max * 1.6 / self.trust_build_rate
+
+        # trust_fragility → fail_penalty, cancel_penalty, decay_per_day
+        # At 0.5: fail=0.3, cancel=0.5, decay=0.015
+        self.trust_fail_penalty = self.trust_fragility * 0.6
+        self.trust_cancel_penalty = self.trust_fragility * 1.0
+        self.trust_decay_per_day = self.trust_fragility * 0.03
+
+        # trust_focus_pressure → cross_client_decay
+        # At 0.5: cross_client_decay = 0.03
+        self.trust_cross_client_decay = self.trust_focus_pressure * 0.06
+
+        # trust_reward_ceiling → reward_scale
+        # ceiling = base_multiplier + ref_mult² × scale × trust_max
+        # Using Premium reference (mult≈1.3): scale = (ceiling - 0.50) / (1.69 × trust_max)
+        ref_mult_sq = 1.69  # 1.3²
+        self.trust_reward_scale = (
+            (self.trust_reward_ceiling - self.trust_base_multiplier)
+            / (ref_mult_sq * self.trust_max)
+        )
+
+        # trust_gating_fraction → threshold + ramp
+        # At 0.2: threshold=0.6, ramp=0.4 (top 40% CAN require, effective ~20%)
+        self.trust_reward_threshold = max(0.0, 1.0 - 2.0 * self.trust_gating_fraction)
+        self.trust_reward_ramp = min(1.0, 2.0 * self.trust_gating_fraction)
+
+        return self
+
    @model_validator(mode="after")
    def _salary_shares_sum_to_one(self) -> WorldConfig:
        total = self.salary_junior.share + self.salary_mid.share + self.salary_senior.share
--- a/src/yc_bench/services/generate_clients.py
+++ b/src/yc_bench/services/generate_clients.py
@ -2,6 +2,7 @@ from __future__ import annotations

 from dataclasses import dataclass, field

+from ..config.schema import WorldConfig
 from ..db.models.company import Domain
 from .rng import RngStreams

@ -26,16 +27,11 @@ _CLIENT_NAME_POOL = [
 _ALL_DOMAINS = list(Domain)


-def _tier_from_multiplier(mult: float) -> str:
-    """Map reward multiplier to a visible tier label.
-
-    Standard: [0.7, 1.0)
-    Premium:  [1.0, 1.7)
-    Enterprise: [1.7, 2.5]
-    """
-    if mult < 1.0:
+def _tier_from_multiplier(mult: float, cfg: WorldConfig) -> str:
+    """Map reward multiplier to a visible tier label."""
+    if mult < cfg.client_tier_premium_threshold:
        return "Standard"
-    if mult < 1.7:
+    if mult < cfg.client_tier_enterprise_threshold:
        return "Premium"
    return "Enterprise"

@ -48,12 +44,10 @@ class GeneratedClient:
    specialty_domains: list[str] = field(default_factory=list)


-def generate_clients(*, run_seed: int, count: int) -> list[GeneratedClient]:
-    """Generate clients with seeded reward multipliers, tiers, and specialty domains.
-
-    Multipliers range from 0.7 to 2.5 (triangular, mode 1.0).
-    Each client gets 1-2 specialty domains (60% get 1, 40% get 2).
-    """
+def generate_clients(*, run_seed: int, count: int, cfg: WorldConfig | None = None) -> list[GeneratedClient]:
+    """Generate clients with seeded reward multipliers, tiers, and specialty domains."""
+    if cfg is None:
+        cfg = WorldConfig()
    if count <= 0:
        return []
    if count > len(_CLIENT_NAME_POOL):
@ -64,10 +58,10 @@ def generate_clients(*, run_seed: int, count: int) -> list[GeneratedClient]:
    names = rng.sample(_CLIENT_NAME_POOL, count)
    clients = []
    for name in names:
-        mult = round(rng.triangular(0.7, 2.5, 1.0), 2)
-        tier = _tier_from_multiplier(mult)
-        # 60% chance of 1 specialty, 40% chance of 2
-        n_specialties = 1 if rng.random() < 0.6 else 2
+        mult = round(rng.triangular(cfg.client_reward_mult_low, cfg.client_reward_mult_high,
+                                     cfg.client_reward_mult_mode), 2)
+        tier = _tier_from_multiplier(mult, cfg)
+        n_specialties = 1 if rng.random() < cfg.client_single_specialty_prob else 2
        specialties = [d.value for d in rng.sample(_ALL_DOMAINS, n_specialties)]
        clients.append(GeneratedClient(
            name=name,
--- a/src/yc_bench/services/generate_tasks.py
+++ b/src/yc_bench/services/generate_tasks.py
@ -64,10 +64,10 @@ def _sample_required_qty(rng, cfg):
    return int(sample_from_spec(rng, cfg.dist.required_qty))


-def _sample_domains_with_bias(rng, k, specialty_domains=None):
+def _sample_domains_with_bias(rng, k, specialty_domains=None, specialty_bias=0.7):
    """Sample k domains, biased toward client specialties.

-    First domain pick: 70% chance of being a specialty (if specialties exist).
+    First domain pick: specialty_bias chance of being a specialty (if specialties exist).
    Remaining picks: uniform random from remaining domains.
    """
    if not specialty_domains or k <= 0:
@ -76,9 +76,9 @@ def _sample_domains_with_bias(rng, k, specialty_domains=None):
    picked = []
    available = list(_ALL_DOMAINS)

-    # First pick: 70% specialty bias
+    # First pick: specialty bias
    specialty_enums = [d for d in _ALL_DOMAINS if d.value in specialty_domains]
-    if specialty_enums and rng.random() < 0.7:
+    if specialty_enums and rng.random() < specialty_bias:
        first = rng.choice(specialty_enums)
    else:
        first = rng.choice(available)
@ -95,7 +95,8 @@ def _sample_domains_with_bias(rng, k, specialty_domains=None):

 def _sample_requirements(rng, cfg, prestige=1, specialty_domains=None):
    k = _sample_domain_count(rng, cfg)
-    picked_domains = _sample_domains_with_bias(rng, k, specialty_domains=specialty_domains)
+    picked_domains = _sample_domains_with_bias(rng, k, specialty_domains=specialty_domains,
+                                                specialty_bias=cfg.task_specialty_domain_bias)
    scale = 1 + cfg.prestige_qty_scale * (prestige - 1)
    return {domain: int(_sample_required_qty(rng, cfg) * scale) for domain in picked_domains}

@ -116,22 +117,20 @@ def _required_trust_from_reward(rng, cfg, reward_cents):

    reward_frac = min(1.0, (reward_cents - reward_floor) / (reward_ceiling - reward_floor))

-    # Only premium tasks (top ~30%) require trust. Clients reserve their
-    # best projects for proven vendors; routine work is open to anyone.
-    trust_prob = max(0.0, (reward_frac - 0.6) / 0.4)  # 0 below 60th pct, ramps to 1.0
+    # Only premium tasks (top portion) require trust.
+    trust_prob = max(0.0, (reward_frac - cfg.trust_reward_threshold) / cfg.trust_reward_ramp)
    if rng.random() >= trust_prob:
        return 0

-    # Trust level required: 1 at threshold, up to 4 for top tasks
-    return max(1, min(int(1 + reward_frac * 3), 4))
+    # Trust level required: 1 at threshold, up to max for top tasks
+    return max(1, min(int(1 + reward_frac * cfg.trust_level_reward_scale), cfg.trust_level_max_required))


 def _make_task(rng, cfg, prestige, serial, requirements, client_index=0):
    reward = _sample_reward_funds_cents(rng, cfg, prestige=prestige)
    required_trust = _required_trust_from_reward(rng, cfg, reward)
-    # Trust-gated tasks get a reward boost (premium projects pay more)
    if required_trust > 0:
-        reward = int(reward * (1.0 + 0.15 * required_trust))
+        reward = int(reward * (1.0 + cfg.trust_gated_reward_boost * required_trust))
    return GeneratedTask(
        title=f"Task-{serial}",
        required_prestige=prestige,
--- a/src/yc_bench/services/seed_world.py
+++ b/src/yc_bench/services/seed_world.py
@ -83,7 +83,7 @@ def _seed_employees(db, company, req):

 def _seed_clients(db, company, req):
    """Create Client rows and ClientTrust rows (all starting at 0.0)."""
-    generated = generate_clients(run_seed=req.run_seed, count=req.cfg.num_clients)
+    generated = generate_clients(run_seed=req.run_seed, count=req.cfg.num_clients, cfg=req.cfg)
    clients = []
    for gc in generated:
        client = Client(id=uuid4(), name=gc.name, reward_multiplier=gc.reward_multiplier,