improved system design, more intuitive hparams, updated configs, greedy bot updates

This commit is contained in:
alckasoc 2026-03-12 12:12:47 -07:00
parent 3d20bee609
commit 70ae316f27
13 changed files with 460 additions and 425 deletions

View file

@ -59,7 +59,7 @@ Your goal is to maximize company prestige and funds over the simulation horizon
- Task completion after deadline = failure (0.8x prestige penalty, no reward, trust penalty)
- Task cancellation = 1.2x prestige penalty per domain + trust penalty (worse than failure)
- Employee throughput = base_rate / number_of_active_tasks_assigned
- Time advances only when you run `yc-bench sim resume`
- Time advances only when you run `yc-bench sim resume`. **Note**: `sim resume` is blocked if you have no active (dispatched) tasks you must accept, assign, and dispatch at least one task before time can advance.
- Prestige is clamped [1, 10]. Funds are in cents.
## Client Trust
@ -149,13 +149,13 @@ def build_turn_context(
if active_tasks == 0 and planned_tasks == 0:
parts.append(
"\n**ACTION REQUIRED**: No tasks are running. "
"Accept a task, assign employees to it, dispatch it, then call `yc-bench sim resume`. "
"Do this now — every turn without active tasks burns runway."
"`sim resume` is BLOCKED until you have active tasks. "
"Accept a task, assign employees to it, and dispatch it now."
)
elif planned_tasks > 0 and active_tasks == 0:
parts.append(
"\n**ACTION REQUIRED**: You have planned tasks but none are dispatched. "
"Assign employees and dispatch now, then call `yc-bench sim resume`."
"`sim resume` is BLOCKED until you dispatch. Assign employees and dispatch now."
)
else:
parts.append("\nDecide your next actions. Use `run_command` to execute CLI commands.")

View file

@ -77,6 +77,22 @@ penalty_cancel_multiplier = 2.0 # hardened: was 1.2
# At 0.55: a prestige-8 task pays ~4.85x more than a prestige-1 task.
reward_prestige_scale = 0.55 # hardened: was 0.3
# --- Client trust ---
# trust_build_rate: ~tasks to reach 80% max trust (higher = slower)
# trust_fragility: 0-1, how punishing failures/inactivity are
# trust_focus_pressure: 0-1, penalty for spreading work across clients
# trust_reward_ceiling: payout multiplier a Premium client gives at max trust
# trust_work_reduction_max: max work reduction at max trust
# trust_gating_fraction: fraction of tasks that require established trust
num_clients = 8
trust_max = 5.0
trust_build_rate = 20.0
trust_fragility = 0.5
trust_focus_pressure = 0.5
trust_reward_ceiling = 2.6
trust_work_reduction_max = 0.40
trust_gating_fraction = 0.20
# Daily prestige decay per domain. Domains not exercised lose prestige
# over time: -0.005/day → -0.15/month. Untouched domain drops ~1 level
# every ~6 months. Prevents single-domain hyper-specialization.

View file

@ -44,6 +44,14 @@ salary_bump_pct = 0.005
# Low reward scaling — prestige climbing not yet necessary.
reward_prestige_scale = 0.3
# --- Client trust (forgiving: builds fairly fast, mild penalties) ---
trust_build_rate = 15.0
trust_fragility = 0.3
trust_focus_pressure = 0.3
trust_reward_ceiling = 2.8
trust_work_reduction_max = 0.40
trust_gating_fraction = 0.15
[world.dist.required_prestige]
type = "triangular"
low = 1

View file

@ -58,6 +58,14 @@ salary_bump_pct = 0.01
# High-prestige tasks pay substantially more.
reward_prestige_scale = 0.55
# --- Client trust (harsh: slow build, heavy penalties, lower ceiling) ---
trust_build_rate = 25.0
trust_fragility = 0.7
trust_focus_pressure = 0.7
trust_reward_ceiling = 2.4
trust_work_reduction_max = 0.35
trust_gating_fraction = 0.25
[world.dist.required_prestige]
type = "triangular"
low = 1

View file

@ -51,6 +51,14 @@ salary_bump_pct = 0.01
# Prestige scaling starting to reward climbing.
reward_prestige_scale = 0.45
# --- Client trust (balanced: default build speed, moderate penalties) ---
trust_build_rate = 20.0
trust_fragility = 0.5
trust_focus_pressure = 0.5
trust_reward_ceiling = 2.6
trust_work_reduction_max = 0.40
trust_gating_fraction = 0.20
[world.dist.required_prestige]
type = "triangular"
low = 1

View file

@ -67,6 +67,14 @@ salary_bump_pct = 0.02
# This is what makes the prestige climb existentially necessary.
reward_prestige_scale = 0.7
# --- Client trust (brutal: very slow build, severe penalties, tight ceiling) ---
trust_build_rate = 30.0
trust_fragility = 0.9
trust_focus_pressure = 0.8
trust_reward_ceiling = 2.2
trust_work_reduction_max = 0.30
trust_gating_fraction = 0.30
[world.dist.required_prestige]
type = "triangular"
low = 1

View file

@ -44,6 +44,14 @@ salary_bump_pct = 0.0
# Mild reward scaling — no need to climb prestige.
reward_prestige_scale = 0.2
# --- Client trust (very forgiving: builds fast, low penalties, generous payoff) ---
trust_build_rate = 10.0
trust_fragility = 0.2
trust_focus_pressure = 0.2
trust_reward_ceiling = 3.0
trust_work_reduction_max = 0.40
trust_gating_fraction = 0.10
[world.dist.required_prestige]
type = "constant"
value = 1 # ALL tasks are prestige-1 — no gating at all.

View file

@ -136,19 +136,44 @@ class WorldConfig(BaseModel):
# every ~3 months. Floored at prestige_min.
prestige_decay_per_day: float = 0.005
# --- Client trust ---
# --- Client trust (intuitive knobs) ---
num_clients: int = 8
trust_max: float = 5.0
# ~how many successful tasks to reach 80% of max trust with one client
trust_build_rate: float = 20.0
# 0-1: how punishing failures/inactivity are (0=forgiving, 1=harsh)
trust_fragility: float = 0.5
# 0-1: how much working for one client hurts trust with others (0=none, 1=heavy)
trust_focus_pressure: float = 0.5
# payout multiplier a typical Premium client (mult≈1.3) gives at max trust
trust_reward_ceiling: float = 2.6
# max work reduction at max trust (0.4 = 40% less work)
trust_work_reduction_max: float = 0.40
# fraction of tasks that require trust (~0.2 = 20%)
trust_gating_fraction: float = 0.20
# --- Derived trust params (computed from knobs above, do not set directly) ---
trust_min: float = 0.0
trust_gain_base: float = 0.40
trust_gain_base: float = 0.0
trust_gain_diminishing_power: float = 1.5
trust_fail_penalty: float = 0.3
trust_cancel_penalty: float = 0.5
trust_decay_per_day: float = 0.015
trust_cross_client_decay: float = 0.03 # completing work for Client A erodes trust with other clients
trust_base_multiplier: float = 0.50 # all clients start at 50% of listed reward
trust_reward_scale: float = 0.25 # reward = listed × (base + client_mult² × scale × trust²/trust_max)
trust_work_reduction_max: float = 0.40 # trusted clients give clearer specs → up to 40% less work at max trust
trust_fail_penalty: float = 0.0
trust_cancel_penalty: float = 0.0
trust_decay_per_day: float = 0.0
trust_cross_client_decay: float = 0.0
trust_base_multiplier: float = 0.50
trust_reward_scale: float = 0.0
trust_reward_threshold: float = 0.0
trust_reward_ramp: float = 0.0
trust_level_reward_scale: float = 3.0
trust_level_max_required: int = 4
trust_gated_reward_boost: float = 0.15
client_reward_mult_low: float = 0.7
client_reward_mult_high: float = 2.5
client_reward_mult_mode: float = 1.0
client_single_specialty_prob: float = 0.6
client_tier_premium_threshold: float = 1.0
client_tier_enterprise_threshold: float = 1.7
task_specialty_domain_bias: float = 0.7
# Required qty scaling by prestige: qty *= 1 + prestige_qty_scale * (prestige - 1).
# At 0.3: prestige-5 tasks need 2.2× the work of prestige-1 tasks.
@ -191,6 +216,45 @@ class WorldConfig(BaseModel):
)
)
@model_validator(mode="after")
def _derive_trust_params(self) -> WorldConfig:
"""Derive detailed trust parameters from the intuitive knobs.
Derivation preserves default behavior: trust_build_rate=20, fragility=0.5,
focus_pressure=0.5, reward_ceiling=2.6 produce the same values as the
original hardcoded defaults.
"""
# trust_build_rate → gain_base
# Approximate: gain_base ≈ trust_max × 1.6 / build_rate
# At default (20): 5.0 × 1.6 / 20 = 0.40
self.trust_gain_base = self.trust_max * 1.6 / self.trust_build_rate
# trust_fragility → fail_penalty, cancel_penalty, decay_per_day
# At 0.5: fail=0.3, cancel=0.5, decay=0.015
self.trust_fail_penalty = self.trust_fragility * 0.6
self.trust_cancel_penalty = self.trust_fragility * 1.0
self.trust_decay_per_day = self.trust_fragility * 0.03
# trust_focus_pressure → cross_client_decay
# At 0.5: cross_client_decay = 0.03
self.trust_cross_client_decay = self.trust_focus_pressure * 0.06
# trust_reward_ceiling → reward_scale
# ceiling = base_multiplier + ref_mult² × scale × trust_max
# Using Premium reference (mult≈1.3): scale = (ceiling - 0.50) / (1.69 × trust_max)
ref_mult_sq = 1.69 # 1.3²
self.trust_reward_scale = (
(self.trust_reward_ceiling - self.trust_base_multiplier)
/ (ref_mult_sq * self.trust_max)
)
# trust_gating_fraction → threshold + ramp
# At 0.2: threshold=0.6, ramp=0.4 (top 40% CAN require, effective ~20%)
self.trust_reward_threshold = max(0.0, 1.0 - 2.0 * self.trust_gating_fraction)
self.trust_reward_ramp = min(1.0, 2.0 * self.trust_gating_fraction)
return self
@model_validator(mode="after")
def _salary_shares_sum_to_one(self) -> WorldConfig:
total = self.salary_junior.share + self.salary_mid.share + self.salary_senior.share

View file

@ -2,6 +2,7 @@ from __future__ import annotations
from dataclasses import dataclass, field
from ..config.schema import WorldConfig
from ..db.models.company import Domain
from .rng import RngStreams
@ -26,16 +27,11 @@ _CLIENT_NAME_POOL = [
_ALL_DOMAINS = list(Domain)
def _tier_from_multiplier(mult: float) -> str:
"""Map reward multiplier to a visible tier label.
Standard: [0.7, 1.0)
Premium: [1.0, 1.7)
Enterprise: [1.7, 2.5]
"""
if mult < 1.0:
def _tier_from_multiplier(mult: float, cfg: WorldConfig) -> str:
"""Map reward multiplier to a visible tier label."""
if mult < cfg.client_tier_premium_threshold:
return "Standard"
if mult < 1.7:
if mult < cfg.client_tier_enterprise_threshold:
return "Premium"
return "Enterprise"
@ -48,12 +44,10 @@ class GeneratedClient:
specialty_domains: list[str] = field(default_factory=list)
def generate_clients(*, run_seed: int, count: int) -> list[GeneratedClient]:
"""Generate clients with seeded reward multipliers, tiers, and specialty domains.
Multipliers range from 0.7 to 2.5 (triangular, mode 1.0).
Each client gets 1-2 specialty domains (60% get 1, 40% get 2).
"""
def generate_clients(*, run_seed: int, count: int, cfg: WorldConfig | None = None) -> list[GeneratedClient]:
"""Generate clients with seeded reward multipliers, tiers, and specialty domains."""
if cfg is None:
cfg = WorldConfig()
if count <= 0:
return []
if count > len(_CLIENT_NAME_POOL):
@ -64,10 +58,10 @@ def generate_clients(*, run_seed: int, count: int) -> list[GeneratedClient]:
names = rng.sample(_CLIENT_NAME_POOL, count)
clients = []
for name in names:
mult = round(rng.triangular(0.7, 2.5, 1.0), 2)
tier = _tier_from_multiplier(mult)
# 60% chance of 1 specialty, 40% chance of 2
n_specialties = 1 if rng.random() < 0.6 else 2
mult = round(rng.triangular(cfg.client_reward_mult_low, cfg.client_reward_mult_high,
cfg.client_reward_mult_mode), 2)
tier = _tier_from_multiplier(mult, cfg)
n_specialties = 1 if rng.random() < cfg.client_single_specialty_prob else 2
specialties = [d.value for d in rng.sample(_ALL_DOMAINS, n_specialties)]
clients.append(GeneratedClient(
name=name,

View file

@ -64,10 +64,10 @@ def _sample_required_qty(rng, cfg):
return int(sample_from_spec(rng, cfg.dist.required_qty))
def _sample_domains_with_bias(rng, k, specialty_domains=None):
def _sample_domains_with_bias(rng, k, specialty_domains=None, specialty_bias=0.7):
"""Sample k domains, biased toward client specialties.
First domain pick: 70% chance of being a specialty (if specialties exist).
First domain pick: specialty_bias chance of being a specialty (if specialties exist).
Remaining picks: uniform random from remaining domains.
"""
if not specialty_domains or k <= 0:
@ -76,9 +76,9 @@ def _sample_domains_with_bias(rng, k, specialty_domains=None):
picked = []
available = list(_ALL_DOMAINS)
# First pick: 70% specialty bias
# First pick: specialty bias
specialty_enums = [d for d in _ALL_DOMAINS if d.value in specialty_domains]
if specialty_enums and rng.random() < 0.7:
if specialty_enums and rng.random() < specialty_bias:
first = rng.choice(specialty_enums)
else:
first = rng.choice(available)
@ -95,7 +95,8 @@ def _sample_domains_with_bias(rng, k, specialty_domains=None):
def _sample_requirements(rng, cfg, prestige=1, specialty_domains=None):
k = _sample_domain_count(rng, cfg)
picked_domains = _sample_domains_with_bias(rng, k, specialty_domains=specialty_domains)
picked_domains = _sample_domains_with_bias(rng, k, specialty_domains=specialty_domains,
specialty_bias=cfg.task_specialty_domain_bias)
scale = 1 + cfg.prestige_qty_scale * (prestige - 1)
return {domain: int(_sample_required_qty(rng, cfg) * scale) for domain in picked_domains}
@ -116,22 +117,20 @@ def _required_trust_from_reward(rng, cfg, reward_cents):
reward_frac = min(1.0, (reward_cents - reward_floor) / (reward_ceiling - reward_floor))
# Only premium tasks (top ~30%) require trust. Clients reserve their
# best projects for proven vendors; routine work is open to anyone.
trust_prob = max(0.0, (reward_frac - 0.6) / 0.4) # 0 below 60th pct, ramps to 1.0
# Only premium tasks (top portion) require trust.
trust_prob = max(0.0, (reward_frac - cfg.trust_reward_threshold) / cfg.trust_reward_ramp)
if rng.random() >= trust_prob:
return 0
# Trust level required: 1 at threshold, up to 4 for top tasks
return max(1, min(int(1 + reward_frac * 3), 4))
# Trust level required: 1 at threshold, up to max for top tasks
return max(1, min(int(1 + reward_frac * cfg.trust_level_reward_scale), cfg.trust_level_max_required))
def _make_task(rng, cfg, prestige, serial, requirements, client_index=0):
reward = _sample_reward_funds_cents(rng, cfg, prestige=prestige)
required_trust = _required_trust_from_reward(rng, cfg, reward)
# Trust-gated tasks get a reward boost (premium projects pay more)
if required_trust > 0:
reward = int(reward * (1.0 + 0.15 * required_trust))
reward = int(reward * (1.0 + cfg.trust_gated_reward_boost * required_trust))
return GeneratedTask(
title=f"Task-{serial}",
required_prestige=prestige,

View file

@ -83,7 +83,7 @@ def _seed_employees(db, company, req):
def _seed_clients(db, company, req):
"""Create Client rows and ClientTrust rows (all starting at 0.0)."""
generated = generate_clients(run_seed=req.run_seed, count=req.cfg.num_clients)
generated = generate_clients(run_seed=req.run_seed, count=req.cfg.num_clients, cfg=req.cfg)
clients = []
for gc in generated:
client = Client(id=uuid4(), name=gc.name, reward_multiplier=gc.reward_multiplier,