diff --git a/scripts/bot_runner.py b/scripts/bot_runner.py index 01f608d..16d5c69 100644 --- a/scripts/bot_runner.py +++ b/scripts/bot_runner.py @@ -256,6 +256,7 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn) horizon_years=cfg.sim.horizon_years, employee_count=world_cfg.num_employees, market_task_count=world_cfg.num_market_tasks, + cfg=world_cfg, start_date=start_dt, ) result = seed_world_transactional(db, req) @@ -333,22 +334,14 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn) TaskRequirement.task_id == task.id ).all() - # Apply trust reward multiplier and work reduction + # Apply trust work reduction (no reward multiplier) if task.client_id is not None: - from yc_bench.db.models.client import Client, ClientTrust - client_row = db.query(Client).filter(Client.id == task.client_id).one_or_none() - client_multiplier = client_row.reward_multiplier if client_row else 1.0 + from yc_bench.db.models.client import ClientTrust ct = db.query(ClientTrust).filter( ClientTrust.company_id == company_id, ClientTrust.client_id == task.client_id, ).one_or_none() trust_level = float(ct.trust_level) if ct else 0.0 - trust_multiplier = ( - world_cfg.trust_base_multiplier - + (client_multiplier ** 2) * world_cfg.trust_reward_scale - * (trust_level ** 2) / world_cfg.trust_max - ) - task.reward_funds_cents = int(task.reward_funds_cents * trust_multiplier) work_reduction = world_cfg.trust_work_reduction_max * (trust_level / world_cfg.trust_max) for r in reqs: r.required_qty = int(float(r.required_qty) * (1 - work_reduction)) diff --git a/src/yc_bench/agent/prompt.py b/src/yc_bench/agent/prompt.py index b8da261..52fdb4d 100644 --- a/src/yc_bench/agent/prompt.py +++ b/src/yc_bench/agent/prompt.py @@ -40,19 +40,6 @@ Your goal is to maximize company prestige and funds over the simulation horizon - `yc-bench task cancel --task-id --reason "text"` — cancel a task (prestige penalty: 1.2x reward delta) - `yc-bench sim resume` — advance simulation to the next checkpoint event and return wake events -## Strategy Guidelines - -1. **Check company status first** to understand your financial position, runway, and **current prestige levels per domain**. -2. **Browse the market at your prestige level** — use `--required-prestige-lte N` where N matches your highest prestige. Higher-prestige tasks pay significantly more (prestige-5 tasks pay ~2.2x more than prestige-1). As your prestige grows, ALWAYS increase your browse filter to find better-paying tasks. -3. **Accept tasks** that match your capabilities and offer good reward-to-risk ratio. -4. **Assign employees strategically** — employees split throughput across active tasks. Focus employees on fewer tasks for faster completion. -5. **Dispatch tasks** once assigned, then continue monitoring progress/events via status and reports. -6. **Monitor deadlines** — completing after deadline causes failure (0.8x prestige penalty). Cancel hopeless tasks early (1.2x penalty, but stops bleeding time). -7. **Watch payroll** — monthly salaries are deducted automatically. Don't let runway drop to zero. -8. **Use status checks** to track critical milestones and risks. -9. **Successful tasks** award funds + prestige + employee skill boosts. Build momentum. -10. **Scale up over time** — regularly check `yc-bench company status` to see your prestige. Browse higher-prestige tasks as you grow — staying on prestige-1 tasks when you have prestige 5+ leaves enormous revenue on the table. - ## Key Rules - Task completion at or before deadline = success (reward funds + prestige + skill boost + client trust gain) @@ -65,31 +52,14 @@ Your goal is to maximize company prestige and funds over the simulation horizon ## Client Trust - Each task is offered by a specific **client** (e.g. "Nexus AI", "Vertex Labs"). -- Trust affects TWO things: **reward** and **work required**. +- Each client has **specialty domains** (e.g. "research", "training"). Tasks from a client are biased toward their specialties. +- Use `yc-bench client list` to see each client's specialties and current trust level. -### Client Tiers and Specialties -- Clients are classified into **tiers**: Standard, Premium, Enterprise. Higher tiers have higher reward potential at high trust. -- Each client has **specialty domains** (e.g. "research", "training"). Tasks from a client are biased toward their specialties — a client specializing in "research" will mostly offer research-heavy tasks. -- Use `yc-bench client list` to see each client's tier, specialties, and current trust level. - -### Reward Scaling -- Listed rewards are **potential** — actual payout depends on trust. Without trust, you only receive about **50%** of the listed reward. -- As trust grows, payouts increase significantly. Higher-tier clients scale better but start worse. -- Observe actual payouts over time to gauge each client's true value. - -### Work Reduction -- **Trusted clients give clearer specs** — work required shrinks with trust (up to 40% reduction at max trust). -- This compounds with higher rewards: at high trust, you earn more in less time. - -### Strategy -- **Domain alignment matters most**: Pick clients whose specialties match your company's prestige strengths. A Premium client aligned with your best domains may outperform an Enterprise client where you're weak. -- **Focus on 2-3 clients** to build trust fast. Scattered work = perpetual low payouts + full work load. -- **Compounding loop**: trust → less work → faster completion → more tasks per month → more trust → even better returns. -- **Higher-tier clients are riskier early**: they pay less than Standard clients at low trust, but become very rewarding at high trust. Standard clients are safer to start with. +### Mechanics - Completing tasks for a client builds **trust** [0.0–5.0]. Trust gains diminish as you approach max. -- **Premium tasks require trust**: High-reward tasks require established trust (required_trust 1-4). Clients don't give their best projects to unproven vendors. -- **Trust decays** daily — relationships need maintenance through continued work. -- **Failures hurt**: -0.3 trust. **Cancellations hurt more**: -0.5 trust. +- Trusted clients require less work (up to 35% reduction at max trust). +- Some tasks require minimum trust to accept (required_trust 1-4). +- Trust decays daily. Task failure and cancellation reduce trust. """ diff --git a/src/yc_bench/cli/sim_commands.py b/src/yc_bench/cli/sim_commands.py index e6bc997..53d613b 100644 --- a/src/yc_bench/cli/sim_commands.py +++ b/src/yc_bench/cli/sim_commands.py @@ -57,6 +57,7 @@ def sim_init( horizon_years=horizon_years, employee_count=employee_count, market_task_count=market_task_count, + cfg=_wc, start_date=start_dt, ) result = seed_world_transactional(db, req) diff --git a/src/yc_bench/cli/task_commands.py b/src/yc_bench/cli/task_commands.py index 022766a..d126a37 100644 --- a/src/yc_bench/cli/task_commands.py +++ b/src/yc_bench/cli/task_commands.py @@ -90,19 +90,10 @@ def task_accept( f"does not meet task requirement ({task.required_trust})." ) - # Apply trust reward multiplier and work reduction at accept time + # Apply trust work reduction at accept time (no reward multiplier — + # faster completion from trust already increases revenue via throughput). _cfg = _get_world_cfg() if task.client_id is not None: - client_row = db.query(Client).filter(Client.id == task.client_id).one_or_none() - client_multiplier = client_row.reward_multiplier if client_row else 1.0 - # Reward: continuous formula - trust_multiplier = ( - _cfg.trust_base_multiplier - + (client_multiplier ** 2) * _cfg.trust_reward_scale - * (trust_level ** 2) / _cfg.trust_max - ) - task.reward_funds_cents = int(task.reward_funds_cents * trust_multiplier) - # Work reduction: trusted clients give clearer specs → less work work_reduction = _cfg.trust_work_reduction_max * (trust_level / _cfg.trust_max) for r in reqs: r.required_qty = int(float(r.required_qty) * (1 - work_reduction)) diff --git a/src/yc_bench/config/__init__.py b/src/yc_bench/config/__init__.py index 0fe0d43..3f3bbaa 100644 --- a/src/yc_bench/config/__init__.py +++ b/src/yc_bench/config/__init__.py @@ -5,14 +5,8 @@ from .schema import ExperimentConfig, AgentConfig, LoopConfig, SimConfig, WorldC def get_world_config() -> WorldConfig: - """Load WorldConfig from the active experiment (YC_BENCH_EXPERIMENT env var, default: 'default'). - - Falls back to default WorldConfig if config loading fails (e.g. outside a benchmark run). - """ - try: - return load_config(os.environ.get("YC_BENCH_EXPERIMENT", "default")).world - except Exception: - return WorldConfig() + """Load WorldConfig from the active experiment (YC_BENCH_EXPERIMENT env var, default: 'default').""" + return load_config(os.environ.get("YC_BENCH_EXPERIMENT", "default")).world __all__ = [ diff --git a/src/yc_bench/config/presets/default.toml b/src/yc_bench/config/presets/default.toml index 52bd696..588fe23 100644 --- a/src/yc_bench/config/presets/default.toml +++ b/src/yc_bench/config/presets/default.toml @@ -63,6 +63,8 @@ market_browse_default_limit = 50 # Salary bump per completed task — each assigned employee gets this raise. # Compounds over time, accelerating payroll pressure. salary_bump_pct = 0.01 +salary_max_cents = 10_000_000 # cap individual salary at $100K/month +skill_rate_max = 10.0 # cap employee skill rate # --- Prestige mechanics --- prestige_max = 10.0 diff --git a/src/yc_bench/config/schema.py b/src/yc_bench/config/schema.py index 5c03f97..4ffdd34 100644 --- a/src/yc_bench/config/schema.py +++ b/src/yc_bench/config/schema.py @@ -107,50 +107,44 @@ class SimConfig(BaseModel): # --------------------------------------------------------------------------- class WorldConfig(BaseModel): + """All world-generation parameters. + + No defaults — every field must be set explicitly in the TOML preset. + This prevents silent drift between schema.py and the preset files. + """ + # --- Workforce --- - num_employees: int = 10 - initial_funds_cents: int = 15_000_000 # $150,000 - initial_prestige_level: float = 1.0 - work_hours_per_day: float = 9.0 + num_employees: int + initial_funds_cents: int + initial_prestige_level: float + work_hours_per_day: float # --- Market --- - num_market_tasks: int = 500 - market_browse_default_limit: int = 50 + num_market_tasks: int + market_browse_default_limit: int # --- Salary bump on task completion --- - salary_bump_pct: float = 0.01 # 1% raise per assigned employee per completed task - salary_max_cents: int = 10_000_000 # cap individual salary at $100K/month - skill_rate_max: float = 30.0 # cap employee skill rate (prevents exponential skill compounding) + salary_bump_pct: float + salary_max_cents: int + skill_rate_max: float # --- Prestige mechanics --- - prestige_max: float = 10.0 - prestige_min: float = 1.0 - penalty_fail_multiplier: float = 0.8 - penalty_cancel_multiplier: float = 1.2 - # Extra reward fraction per prestige level above 1. - # At 0.55: prestige-8 tasks pay ~4.85x more than prestige-1. - reward_prestige_scale: float = 0.3 - - # Daily prestige decay per domain. Domains not exercised lose prestige - # over time: -0.01/day → -0.3/month → untouched domain drops ~1 level - # every ~3 months. Floored at prestige_min. - prestige_decay_per_day: float = 0.005 + prestige_max: float + prestige_min: float + penalty_fail_multiplier: float + penalty_cancel_multiplier: float + reward_prestige_scale: float + prestige_decay_per_day: float # --- Client trust (intuitive knobs) --- - num_clients: int = 8 - trust_max: float = 5.0 - # ~how many successful tasks to reach 80% of max trust with one client - trust_build_rate: float = 20.0 - # 0-1: how punishing failures/inactivity are (0=forgiving, 1=harsh) - trust_fragility: float = 0.5 - # 0-1: how much working for one client hurts trust with others (0=none, 1=heavy) - trust_focus_pressure: float = 0.5 - # payout multiplier a typical Premium client (mult≈1.3) gives at max trust - trust_reward_ceiling: float = 2.6 - # max work reduction at max trust (0.4 = 40% less work) - trust_work_reduction_max: float = 0.40 - # fraction of tasks that require trust (~0.2 = 20%) - trust_gating_fraction: float = 0.20 + num_clients: int + trust_max: float + trust_build_rate: float + trust_fragility: float + trust_focus_pressure: float + trust_reward_ceiling: float + trust_work_reduction_max: float + trust_gating_fraction: float # --- Derived trust params (computed from knobs above, do not set directly) --- trust_min: float = 0.0 @@ -175,46 +169,25 @@ class WorldConfig(BaseModel): client_tier_enterprise_threshold: float = 1.7 task_specialty_domain_bias: float = 0.7 - # Required qty scaling by prestige: qty *= 1 + prestige_qty_scale * (prestige - 1). - # At 0.3: prestige-5 tasks need 2.2× the work of prestige-1 tasks. - prestige_qty_scale: float = 0.3 + # --- Task scaling --- + prestige_qty_scale: float + deadline_qty_per_day: float + deadline_min_biz_days: int - # --- Deadline computation --- - deadline_qty_per_day: float = 150.0 # max per-domain qty / this = deadline days - deadline_min_biz_days: int = 7 - - # --- Progress milestones (fraction thresholds that trigger checkpoint events) --- - task_progress_milestones: list[float] = Field(default_factory=lambda: [0.25, 0.5, 0.75]) + # --- Progress milestones --- + task_progress_milestones: list[float] # --- Business hours --- - workday_start_hour: int = 9 - workday_end_hour: int = 18 + workday_start_hour: int + workday_end_hour: int - # --- Distributions (shape of random draws during world generation) --- + # --- Distributions --- dist: WorldDists = Field(default_factory=WorldDists) # --- Salary tiers --- - salary_junior: SalaryTierConfig = Field( - default_factory=lambda: SalaryTierConfig( - name="junior", share=0.50, - min_cents=200_000, max_cents=400_000, - rate_min=1.0, rate_max=4.0, - ) - ) - salary_mid: SalaryTierConfig = Field( - default_factory=lambda: SalaryTierConfig( - name="mid", share=0.35, - min_cents=600_000, max_cents=800_000, - rate_min=4.0, rate_max=7.0, - ) - ) - salary_senior: SalaryTierConfig = Field( - default_factory=lambda: SalaryTierConfig( - name="senior", share=0.15, - min_cents=1_000_000, max_cents=1_500_000, - rate_min=7.0, rate_max=10.0, - ) - ) + salary_junior: SalaryTierConfig + salary_mid: SalaryTierConfig + salary_senior: SalaryTierConfig @model_validator(mode="after") def _derive_trust_params(self) -> WorldConfig: diff --git a/src/yc_bench/services/generate_clients.py b/src/yc_bench/services/generate_clients.py index a87cac8..8ebe391 100644 --- a/src/yc_bench/services/generate_clients.py +++ b/src/yc_bench/services/generate_clients.py @@ -44,10 +44,8 @@ class GeneratedClient: specialty_domains: list[str] = field(default_factory=list) -def generate_clients(*, run_seed: int, count: int, cfg: WorldConfig | None = None) -> list[GeneratedClient]: +def generate_clients(*, run_seed: int, count: int, cfg: WorldConfig) -> list[GeneratedClient]: """Generate clients with seeded reward multipliers, tiers, and specialty domains.""" - if cfg is None: - cfg = WorldConfig() if count <= 0: return [] if count > len(_CLIENT_NAME_POOL): diff --git a/src/yc_bench/services/generate_employees.py b/src/yc_bench/services/generate_employees.py index 0c3f8a9..5504e7b 100644 --- a/src/yc_bench/services/generate_employees.py +++ b/src/yc_bench/services/generate_employees.py @@ -48,9 +48,7 @@ def _sample_domain_rates(rng, min_rate, max_rate): return [round(rng.uniform(min_rate, max_rate), 4) for _ in range(_NUM_DOMAINS)] -def generate_employees(*, run_seed, count, cfg=None): - if cfg is None: - cfg = WorldConfig() +def generate_employees(*, run_seed, count, cfg): if count <= 0: return [] @@ -83,7 +81,7 @@ def generate_employees(*, run_seed, count, cfg=None): return employees -def build_employee_rows(*, run_seed, company_id, count, cfg=None): +def build_employee_rows(*, run_seed, company_id, count, cfg): generated = generate_employees(run_seed=run_seed, count=count, cfg=cfg) employee_rows = [] skill_rows = [] diff --git a/src/yc_bench/services/generate_tasks.py b/src/yc_bench/services/generate_tasks.py index 458819c..c95d515 100644 --- a/src/yc_bench/services/generate_tasks.py +++ b/src/yc_bench/services/generate_tasks.py @@ -150,15 +150,13 @@ def _make_task(rng, cfg, prestige, serial, requirements, client_index=0): ) -def generate_tasks(*, run_seed, count, cfg=None, client_specialties=None): +def generate_tasks(*, run_seed, count, cfg, client_specialties=None): """Generate market tasks. Args: client_specialties: list of specialty domain lists, one per client index. e.g. [["research", "training"], ["inference"]] for 2 clients. """ - if cfg is None: - cfg = WorldConfig() if count <= 0: return [] @@ -176,7 +174,7 @@ def generate_tasks(*, run_seed, count, cfg=None, client_specialties=None): return out -def build_task_rows(*, run_seed, count, cfg=None): +def build_task_rows(*, run_seed, count, cfg): generated = generate_tasks(run_seed=run_seed, count=count, cfg=cfg) task_rows = [] requirement_rows = [] @@ -208,10 +206,8 @@ def build_task_rows(*, run_seed, count, cfg=None): return task_rows, requirement_rows -def generate_replacement_task(*, run_seed, replenish_counter, replaced_prestige, replaced_client_index=0, cfg=None, specialty_domains=None): +def generate_replacement_task(*, run_seed, replenish_counter, replaced_prestige, replaced_client_index=0, cfg, specialty_domains=None): """Generate a replacement task with the same prestige and client as the accepted task.""" - if cfg is None: - cfg = WorldConfig() streams = RngStreams(run_seed) rng = streams.stream(f"replenish_{replenish_counter}") requirements = _sample_requirements(rng, cfg, prestige=replaced_prestige, specialty_domains=specialty_domains) diff --git a/src/yc_bench/services/seed_world.py b/src/yc_bench/services/seed_world.py index a6b066b..d605b29 100644 --- a/src/yc_bench/services/seed_world.py +++ b/src/yc_bench/services/seed_world.py @@ -26,8 +26,8 @@ class SeedWorldRequest: horizon_years: int employee_count: int market_task_count: int + cfg: WorldConfig start_date: datetime | None = None - cfg: WorldConfig = field(default_factory=WorldConfig) @dataclass(frozen=True)