mirror of
https://github.com/collinear-ai/yc-bench.git
synced 2026-04-30 17:40:40 +00:00
improved system design, more intuitive hparams, updated configs, greedy bot updates
This commit is contained in:
parent
3d20bee609
commit
70ae316f27
13 changed files with 460 additions and 425 deletions
|
|
@ -59,7 +59,7 @@ Your goal is to maximize company prestige and funds over the simulation horizon
|
|||
- Task completion after deadline = failure (0.8x prestige penalty, no reward, trust penalty)
|
||||
- Task cancellation = 1.2x prestige penalty per domain + trust penalty (worse than failure)
|
||||
- Employee throughput = base_rate / number_of_active_tasks_assigned
|
||||
- Time advances only when you run `yc-bench sim resume`
|
||||
- Time advances only when you run `yc-bench sim resume`. **Note**: `sim resume` is blocked if you have no active (dispatched) tasks — you must accept, assign, and dispatch at least one task before time can advance.
|
||||
- Prestige is clamped [1, 10]. Funds are in cents.
|
||||
|
||||
## Client Trust
|
||||
|
|
@ -149,13 +149,13 @@ def build_turn_context(
|
|||
if active_tasks == 0 and planned_tasks == 0:
|
||||
parts.append(
|
||||
"\n**ACTION REQUIRED**: No tasks are running. "
|
||||
"Accept a task, assign employees to it, dispatch it, then call `yc-bench sim resume`. "
|
||||
"Do this now — every turn without active tasks burns runway."
|
||||
"`sim resume` is BLOCKED until you have active tasks. "
|
||||
"Accept a task, assign employees to it, and dispatch it now."
|
||||
)
|
||||
elif planned_tasks > 0 and active_tasks == 0:
|
||||
parts.append(
|
||||
"\n**ACTION REQUIRED**: You have planned tasks but none are dispatched. "
|
||||
"Assign employees and dispatch now, then call `yc-bench sim resume`."
|
||||
"`sim resume` is BLOCKED until you dispatch. Assign employees and dispatch now."
|
||||
)
|
||||
else:
|
||||
parts.append("\nDecide your next actions. Use `run_command` to execute CLI commands.")
|
||||
|
|
|
|||
|
|
@ -77,6 +77,22 @@ penalty_cancel_multiplier = 2.0 # hardened: was 1.2
|
|||
# At 0.55: a prestige-8 task pays ~4.85x more than a prestige-1 task.
|
||||
reward_prestige_scale = 0.55 # hardened: was 0.3
|
||||
|
||||
# --- Client trust ---
|
||||
# trust_build_rate: ~tasks to reach 80% max trust (higher = slower)
|
||||
# trust_fragility: 0-1, how punishing failures/inactivity are
|
||||
# trust_focus_pressure: 0-1, penalty for spreading work across clients
|
||||
# trust_reward_ceiling: payout multiplier a Premium client gives at max trust
|
||||
# trust_work_reduction_max: max work reduction at max trust
|
||||
# trust_gating_fraction: fraction of tasks that require established trust
|
||||
num_clients = 8
|
||||
trust_max = 5.0
|
||||
trust_build_rate = 20.0
|
||||
trust_fragility = 0.5
|
||||
trust_focus_pressure = 0.5
|
||||
trust_reward_ceiling = 2.6
|
||||
trust_work_reduction_max = 0.40
|
||||
trust_gating_fraction = 0.20
|
||||
|
||||
# Daily prestige decay per domain. Domains not exercised lose prestige
|
||||
# over time: -0.005/day → -0.15/month. Untouched domain drops ~1 level
|
||||
# every ~6 months. Prevents single-domain hyper-specialization.
|
||||
|
|
|
|||
|
|
@ -44,6 +44,14 @@ salary_bump_pct = 0.005
|
|||
# Low reward scaling — prestige climbing not yet necessary.
|
||||
reward_prestige_scale = 0.3
|
||||
|
||||
# --- Client trust (forgiving: builds fairly fast, mild penalties) ---
|
||||
trust_build_rate = 15.0
|
||||
trust_fragility = 0.3
|
||||
trust_focus_pressure = 0.3
|
||||
trust_reward_ceiling = 2.8
|
||||
trust_work_reduction_max = 0.40
|
||||
trust_gating_fraction = 0.15
|
||||
|
||||
[world.dist.required_prestige]
|
||||
type = "triangular"
|
||||
low = 1
|
||||
|
|
|
|||
|
|
@ -58,6 +58,14 @@ salary_bump_pct = 0.01
|
|||
# High-prestige tasks pay substantially more.
|
||||
reward_prestige_scale = 0.55
|
||||
|
||||
# --- Client trust (harsh: slow build, heavy penalties, lower ceiling) ---
|
||||
trust_build_rate = 25.0
|
||||
trust_fragility = 0.7
|
||||
trust_focus_pressure = 0.7
|
||||
trust_reward_ceiling = 2.4
|
||||
trust_work_reduction_max = 0.35
|
||||
trust_gating_fraction = 0.25
|
||||
|
||||
[world.dist.required_prestige]
|
||||
type = "triangular"
|
||||
low = 1
|
||||
|
|
|
|||
|
|
@ -51,6 +51,14 @@ salary_bump_pct = 0.01
|
|||
# Prestige scaling starting to reward climbing.
|
||||
reward_prestige_scale = 0.45
|
||||
|
||||
# --- Client trust (balanced: default build speed, moderate penalties) ---
|
||||
trust_build_rate = 20.0
|
||||
trust_fragility = 0.5
|
||||
trust_focus_pressure = 0.5
|
||||
trust_reward_ceiling = 2.6
|
||||
trust_work_reduction_max = 0.40
|
||||
trust_gating_fraction = 0.20
|
||||
|
||||
[world.dist.required_prestige]
|
||||
type = "triangular"
|
||||
low = 1
|
||||
|
|
|
|||
|
|
@ -67,6 +67,14 @@ salary_bump_pct = 0.02
|
|||
# This is what makes the prestige climb existentially necessary.
|
||||
reward_prestige_scale = 0.7
|
||||
|
||||
# --- Client trust (brutal: very slow build, severe penalties, tight ceiling) ---
|
||||
trust_build_rate = 30.0
|
||||
trust_fragility = 0.9
|
||||
trust_focus_pressure = 0.8
|
||||
trust_reward_ceiling = 2.2
|
||||
trust_work_reduction_max = 0.30
|
||||
trust_gating_fraction = 0.30
|
||||
|
||||
[world.dist.required_prestige]
|
||||
type = "triangular"
|
||||
low = 1
|
||||
|
|
|
|||
|
|
@ -44,6 +44,14 @@ salary_bump_pct = 0.0
|
|||
# Mild reward scaling — no need to climb prestige.
|
||||
reward_prestige_scale = 0.2
|
||||
|
||||
# --- Client trust (very forgiving: builds fast, low penalties, generous payoff) ---
|
||||
trust_build_rate = 10.0
|
||||
trust_fragility = 0.2
|
||||
trust_focus_pressure = 0.2
|
||||
trust_reward_ceiling = 3.0
|
||||
trust_work_reduction_max = 0.40
|
||||
trust_gating_fraction = 0.10
|
||||
|
||||
[world.dist.required_prestige]
|
||||
type = "constant"
|
||||
value = 1 # ALL tasks are prestige-1 — no gating at all.
|
||||
|
|
|
|||
|
|
@ -136,19 +136,44 @@ class WorldConfig(BaseModel):
|
|||
# every ~3 months. Floored at prestige_min.
|
||||
prestige_decay_per_day: float = 0.005
|
||||
|
||||
# --- Client trust ---
|
||||
# --- Client trust (intuitive knobs) ---
|
||||
num_clients: int = 8
|
||||
trust_max: float = 5.0
|
||||
# ~how many successful tasks to reach 80% of max trust with one client
|
||||
trust_build_rate: float = 20.0
|
||||
# 0-1: how punishing failures/inactivity are (0=forgiving, 1=harsh)
|
||||
trust_fragility: float = 0.5
|
||||
# 0-1: how much working for one client hurts trust with others (0=none, 1=heavy)
|
||||
trust_focus_pressure: float = 0.5
|
||||
# payout multiplier a typical Premium client (mult≈1.3) gives at max trust
|
||||
trust_reward_ceiling: float = 2.6
|
||||
# max work reduction at max trust (0.4 = 40% less work)
|
||||
trust_work_reduction_max: float = 0.40
|
||||
# fraction of tasks that require trust (~0.2 = 20%)
|
||||
trust_gating_fraction: float = 0.20
|
||||
|
||||
# --- Derived trust params (computed from knobs above, do not set directly) ---
|
||||
trust_min: float = 0.0
|
||||
trust_gain_base: float = 0.40
|
||||
trust_gain_base: float = 0.0
|
||||
trust_gain_diminishing_power: float = 1.5
|
||||
trust_fail_penalty: float = 0.3
|
||||
trust_cancel_penalty: float = 0.5
|
||||
trust_decay_per_day: float = 0.015
|
||||
trust_cross_client_decay: float = 0.03 # completing work for Client A erodes trust with other clients
|
||||
trust_base_multiplier: float = 0.50 # all clients start at 50% of listed reward
|
||||
trust_reward_scale: float = 0.25 # reward = listed × (base + client_mult² × scale × trust²/trust_max)
|
||||
trust_work_reduction_max: float = 0.40 # trusted clients give clearer specs → up to 40% less work at max trust
|
||||
trust_fail_penalty: float = 0.0
|
||||
trust_cancel_penalty: float = 0.0
|
||||
trust_decay_per_day: float = 0.0
|
||||
trust_cross_client_decay: float = 0.0
|
||||
trust_base_multiplier: float = 0.50
|
||||
trust_reward_scale: float = 0.0
|
||||
trust_reward_threshold: float = 0.0
|
||||
trust_reward_ramp: float = 0.0
|
||||
trust_level_reward_scale: float = 3.0
|
||||
trust_level_max_required: int = 4
|
||||
trust_gated_reward_boost: float = 0.15
|
||||
client_reward_mult_low: float = 0.7
|
||||
client_reward_mult_high: float = 2.5
|
||||
client_reward_mult_mode: float = 1.0
|
||||
client_single_specialty_prob: float = 0.6
|
||||
client_tier_premium_threshold: float = 1.0
|
||||
client_tier_enterprise_threshold: float = 1.7
|
||||
task_specialty_domain_bias: float = 0.7
|
||||
|
||||
# Required qty scaling by prestige: qty *= 1 + prestige_qty_scale * (prestige - 1).
|
||||
# At 0.3: prestige-5 tasks need 2.2× the work of prestige-1 tasks.
|
||||
|
|
@ -191,6 +216,45 @@ class WorldConfig(BaseModel):
|
|||
)
|
||||
)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _derive_trust_params(self) -> WorldConfig:
|
||||
"""Derive detailed trust parameters from the intuitive knobs.
|
||||
|
||||
Derivation preserves default behavior: trust_build_rate=20, fragility=0.5,
|
||||
focus_pressure=0.5, reward_ceiling=2.6 produce the same values as the
|
||||
original hardcoded defaults.
|
||||
"""
|
||||
# trust_build_rate → gain_base
|
||||
# Approximate: gain_base ≈ trust_max × 1.6 / build_rate
|
||||
# At default (20): 5.0 × 1.6 / 20 = 0.40
|
||||
self.trust_gain_base = self.trust_max * 1.6 / self.trust_build_rate
|
||||
|
||||
# trust_fragility → fail_penalty, cancel_penalty, decay_per_day
|
||||
# At 0.5: fail=0.3, cancel=0.5, decay=0.015
|
||||
self.trust_fail_penalty = self.trust_fragility * 0.6
|
||||
self.trust_cancel_penalty = self.trust_fragility * 1.0
|
||||
self.trust_decay_per_day = self.trust_fragility * 0.03
|
||||
|
||||
# trust_focus_pressure → cross_client_decay
|
||||
# At 0.5: cross_client_decay = 0.03
|
||||
self.trust_cross_client_decay = self.trust_focus_pressure * 0.06
|
||||
|
||||
# trust_reward_ceiling → reward_scale
|
||||
# ceiling = base_multiplier + ref_mult² × scale × trust_max
|
||||
# Using Premium reference (mult≈1.3): scale = (ceiling - 0.50) / (1.69 × trust_max)
|
||||
ref_mult_sq = 1.69 # 1.3²
|
||||
self.trust_reward_scale = (
|
||||
(self.trust_reward_ceiling - self.trust_base_multiplier)
|
||||
/ (ref_mult_sq * self.trust_max)
|
||||
)
|
||||
|
||||
# trust_gating_fraction → threshold + ramp
|
||||
# At 0.2: threshold=0.6, ramp=0.4 (top 40% CAN require, effective ~20%)
|
||||
self.trust_reward_threshold = max(0.0, 1.0 - 2.0 * self.trust_gating_fraction)
|
||||
self.trust_reward_ramp = min(1.0, 2.0 * self.trust_gating_fraction)
|
||||
|
||||
return self
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _salary_shares_sum_to_one(self) -> WorldConfig:
|
||||
total = self.salary_junior.share + self.salary_mid.share + self.salary_senior.share
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
|||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..config.schema import WorldConfig
|
||||
from ..db.models.company import Domain
|
||||
from .rng import RngStreams
|
||||
|
||||
|
|
@ -26,16 +27,11 @@ _CLIENT_NAME_POOL = [
|
|||
_ALL_DOMAINS = list(Domain)
|
||||
|
||||
|
||||
def _tier_from_multiplier(mult: float) -> str:
|
||||
"""Map reward multiplier to a visible tier label.
|
||||
|
||||
Standard: [0.7, 1.0)
|
||||
Premium: [1.0, 1.7)
|
||||
Enterprise: [1.7, 2.5]
|
||||
"""
|
||||
if mult < 1.0:
|
||||
def _tier_from_multiplier(mult: float, cfg: WorldConfig) -> str:
|
||||
"""Map reward multiplier to a visible tier label."""
|
||||
if mult < cfg.client_tier_premium_threshold:
|
||||
return "Standard"
|
||||
if mult < 1.7:
|
||||
if mult < cfg.client_tier_enterprise_threshold:
|
||||
return "Premium"
|
||||
return "Enterprise"
|
||||
|
||||
|
|
@ -48,12 +44,10 @@ class GeneratedClient:
|
|||
specialty_domains: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def generate_clients(*, run_seed: int, count: int) -> list[GeneratedClient]:
|
||||
"""Generate clients with seeded reward multipliers, tiers, and specialty domains.
|
||||
|
||||
Multipliers range from 0.7 to 2.5 (triangular, mode 1.0).
|
||||
Each client gets 1-2 specialty domains (60% get 1, 40% get 2).
|
||||
"""
|
||||
def generate_clients(*, run_seed: int, count: int, cfg: WorldConfig | None = None) -> list[GeneratedClient]:
|
||||
"""Generate clients with seeded reward multipliers, tiers, and specialty domains."""
|
||||
if cfg is None:
|
||||
cfg = WorldConfig()
|
||||
if count <= 0:
|
||||
return []
|
||||
if count > len(_CLIENT_NAME_POOL):
|
||||
|
|
@ -64,10 +58,10 @@ def generate_clients(*, run_seed: int, count: int) -> list[GeneratedClient]:
|
|||
names = rng.sample(_CLIENT_NAME_POOL, count)
|
||||
clients = []
|
||||
for name in names:
|
||||
mult = round(rng.triangular(0.7, 2.5, 1.0), 2)
|
||||
tier = _tier_from_multiplier(mult)
|
||||
# 60% chance of 1 specialty, 40% chance of 2
|
||||
n_specialties = 1 if rng.random() < 0.6 else 2
|
||||
mult = round(rng.triangular(cfg.client_reward_mult_low, cfg.client_reward_mult_high,
|
||||
cfg.client_reward_mult_mode), 2)
|
||||
tier = _tier_from_multiplier(mult, cfg)
|
||||
n_specialties = 1 if rng.random() < cfg.client_single_specialty_prob else 2
|
||||
specialties = [d.value for d in rng.sample(_ALL_DOMAINS, n_specialties)]
|
||||
clients.append(GeneratedClient(
|
||||
name=name,
|
||||
|
|
|
|||
|
|
@ -64,10 +64,10 @@ def _sample_required_qty(rng, cfg):
|
|||
return int(sample_from_spec(rng, cfg.dist.required_qty))
|
||||
|
||||
|
||||
def _sample_domains_with_bias(rng, k, specialty_domains=None):
|
||||
def _sample_domains_with_bias(rng, k, specialty_domains=None, specialty_bias=0.7):
|
||||
"""Sample k domains, biased toward client specialties.
|
||||
|
||||
First domain pick: 70% chance of being a specialty (if specialties exist).
|
||||
First domain pick: specialty_bias chance of being a specialty (if specialties exist).
|
||||
Remaining picks: uniform random from remaining domains.
|
||||
"""
|
||||
if not specialty_domains or k <= 0:
|
||||
|
|
@ -76,9 +76,9 @@ def _sample_domains_with_bias(rng, k, specialty_domains=None):
|
|||
picked = []
|
||||
available = list(_ALL_DOMAINS)
|
||||
|
||||
# First pick: 70% specialty bias
|
||||
# First pick: specialty bias
|
||||
specialty_enums = [d for d in _ALL_DOMAINS if d.value in specialty_domains]
|
||||
if specialty_enums and rng.random() < 0.7:
|
||||
if specialty_enums and rng.random() < specialty_bias:
|
||||
first = rng.choice(specialty_enums)
|
||||
else:
|
||||
first = rng.choice(available)
|
||||
|
|
@ -95,7 +95,8 @@ def _sample_domains_with_bias(rng, k, specialty_domains=None):
|
|||
|
||||
def _sample_requirements(rng, cfg, prestige=1, specialty_domains=None):
|
||||
k = _sample_domain_count(rng, cfg)
|
||||
picked_domains = _sample_domains_with_bias(rng, k, specialty_domains=specialty_domains)
|
||||
picked_domains = _sample_domains_with_bias(rng, k, specialty_domains=specialty_domains,
|
||||
specialty_bias=cfg.task_specialty_domain_bias)
|
||||
scale = 1 + cfg.prestige_qty_scale * (prestige - 1)
|
||||
return {domain: int(_sample_required_qty(rng, cfg) * scale) for domain in picked_domains}
|
||||
|
||||
|
|
@ -116,22 +117,20 @@ def _required_trust_from_reward(rng, cfg, reward_cents):
|
|||
|
||||
reward_frac = min(1.0, (reward_cents - reward_floor) / (reward_ceiling - reward_floor))
|
||||
|
||||
# Only premium tasks (top ~30%) require trust. Clients reserve their
|
||||
# best projects for proven vendors; routine work is open to anyone.
|
||||
trust_prob = max(0.0, (reward_frac - 0.6) / 0.4) # 0 below 60th pct, ramps to 1.0
|
||||
# Only premium tasks (top portion) require trust.
|
||||
trust_prob = max(0.0, (reward_frac - cfg.trust_reward_threshold) / cfg.trust_reward_ramp)
|
||||
if rng.random() >= trust_prob:
|
||||
return 0
|
||||
|
||||
# Trust level required: 1 at threshold, up to 4 for top tasks
|
||||
return max(1, min(int(1 + reward_frac * 3), 4))
|
||||
# Trust level required: 1 at threshold, up to max for top tasks
|
||||
return max(1, min(int(1 + reward_frac * cfg.trust_level_reward_scale), cfg.trust_level_max_required))
|
||||
|
||||
|
||||
def _make_task(rng, cfg, prestige, serial, requirements, client_index=0):
|
||||
reward = _sample_reward_funds_cents(rng, cfg, prestige=prestige)
|
||||
required_trust = _required_trust_from_reward(rng, cfg, reward)
|
||||
# Trust-gated tasks get a reward boost (premium projects pay more)
|
||||
if required_trust > 0:
|
||||
reward = int(reward * (1.0 + 0.15 * required_trust))
|
||||
reward = int(reward * (1.0 + cfg.trust_gated_reward_boost * required_trust))
|
||||
return GeneratedTask(
|
||||
title=f"Task-{serial}",
|
||||
required_prestige=prestige,
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ def _seed_employees(db, company, req):
|
|||
|
||||
def _seed_clients(db, company, req):
|
||||
"""Create Client rows and ClientTrust rows (all starting at 0.0)."""
|
||||
generated = generate_clients(run_seed=req.run_seed, count=req.cfg.num_clients)
|
||||
generated = generate_clients(run_seed=req.run_seed, count=req.cfg.num_clients, cfg=req.cfg)
|
||||
clients = []
|
||||
for gc in generated:
|
||||
client = Client(id=uuid4(), name=gc.name, reward_multiplier=gc.reward_multiplier,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue