Capped skill rate at 10 + removed reward mult from clients

This commit is contained in:
Muyu He 2026-03-16 16:09:17 -07:00
parent d976b9cbb4
commit 140bb58653
11 changed files with 64 additions and 148 deletions

View file

@ -256,6 +256,7 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn)
horizon_years=cfg.sim.horizon_years,
employee_count=world_cfg.num_employees,
market_task_count=world_cfg.num_market_tasks,
cfg=world_cfg,
start_date=start_dt,
)
result = seed_world_transactional(db, req)
@ -333,22 +334,14 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn)
TaskRequirement.task_id == task.id
).all()
# Apply trust reward multiplier and work reduction
# Apply trust work reduction (no reward multiplier)
if task.client_id is not None:
from yc_bench.db.models.client import Client, ClientTrust
client_row = db.query(Client).filter(Client.id == task.client_id).one_or_none()
client_multiplier = client_row.reward_multiplier if client_row else 1.0
from yc_bench.db.models.client import ClientTrust
ct = db.query(ClientTrust).filter(
ClientTrust.company_id == company_id,
ClientTrust.client_id == task.client_id,
).one_or_none()
trust_level = float(ct.trust_level) if ct else 0.0
trust_multiplier = (
world_cfg.trust_base_multiplier
+ (client_multiplier ** 2) * world_cfg.trust_reward_scale
* (trust_level ** 2) / world_cfg.trust_max
)
task.reward_funds_cents = int(task.reward_funds_cents * trust_multiplier)
work_reduction = world_cfg.trust_work_reduction_max * (trust_level / world_cfg.trust_max)
for r in reqs:
r.required_qty = int(float(r.required_qty) * (1 - work_reduction))

View file

@ -40,19 +40,6 @@ Your goal is to maximize company prestige and funds over the simulation horizon
- `yc-bench task cancel --task-id <UUID> --reason "text"` cancel a task (prestige penalty: 1.2x reward delta)
- `yc-bench sim resume` advance simulation to the next checkpoint event and return wake events
## Strategy Guidelines
1. **Check company status first** to understand your financial position, runway, and **current prestige levels per domain**.
2. **Browse the market at your prestige level** use `--required-prestige-lte N` where N matches your highest prestige. Higher-prestige tasks pay significantly more (prestige-5 tasks pay ~2.2x more than prestige-1). As your prestige grows, ALWAYS increase your browse filter to find better-paying tasks.
3. **Accept tasks** that match your capabilities and offer good reward-to-risk ratio.
4. **Assign employees strategically** employees split throughput across active tasks. Focus employees on fewer tasks for faster completion.
5. **Dispatch tasks** once assigned, then continue monitoring progress/events via status and reports.
6. **Monitor deadlines** completing after deadline causes failure (0.8x prestige penalty). Cancel hopeless tasks early (1.2x penalty, but stops bleeding time).
7. **Watch payroll** monthly salaries are deducted automatically. Don't let runway drop to zero.
8. **Use status checks** to track critical milestones and risks.
9. **Successful tasks** award funds + prestige + employee skill boosts. Build momentum.
10. **Scale up over time** regularly check `yc-bench company status` to see your prestige. Browse higher-prestige tasks as you grow staying on prestige-1 tasks when you have prestige 5+ leaves enormous revenue on the table.
## Key Rules
- Task completion at or before deadline = success (reward funds + prestige + skill boost + client trust gain)
@ -65,31 +52,14 @@ Your goal is to maximize company prestige and funds over the simulation horizon
## Client Trust
- Each task is offered by a specific **client** (e.g. "Nexus AI", "Vertex Labs").
- Trust affects TWO things: **reward** and **work required**.
- Each client has **specialty domains** (e.g. "research", "training"). Tasks from a client are biased toward their specialties.
- Use `yc-bench client list` to see each client's specialties and current trust level.
### Client Tiers and Specialties
- Clients are classified into **tiers**: Standard, Premium, Enterprise. Higher tiers have higher reward potential at high trust.
- Each client has **specialty domains** (e.g. "research", "training"). Tasks from a client are biased toward their specialties a client specializing in "research" will mostly offer research-heavy tasks.
- Use `yc-bench client list` to see each client's tier, specialties, and current trust level.
### Reward Scaling
- Listed rewards are **potential** actual payout depends on trust. Without trust, you only receive about **50%** of the listed reward.
- As trust grows, payouts increase significantly. Higher-tier clients scale better but start worse.
- Observe actual payouts over time to gauge each client's true value.
### Work Reduction
- **Trusted clients give clearer specs** work required shrinks with trust (up to 40% reduction at max trust).
- This compounds with higher rewards: at high trust, you earn more in less time.
### Strategy
- **Domain alignment matters most**: Pick clients whose specialties match your company's prestige strengths. A Premium client aligned with your best domains may outperform an Enterprise client where you're weak.
- **Focus on 2-3 clients** to build trust fast. Scattered work = perpetual low payouts + full work load.
- **Compounding loop**: trust less work faster completion more tasks per month more trust even better returns.
- **Higher-tier clients are riskier early**: they pay less than Standard clients at low trust, but become very rewarding at high trust. Standard clients are safer to start with.
### Mechanics
- Completing tasks for a client builds **trust** [0.05.0]. Trust gains diminish as you approach max.
- **Premium tasks require trust**: High-reward tasks require established trust (required_trust 1-4). Clients don't give their best projects to unproven vendors.
- **Trust decays** daily relationships need maintenance through continued work.
- **Failures hurt**: -0.3 trust. **Cancellations hurt more**: -0.5 trust.
- Trusted clients require less work (up to 35% reduction at max trust).
- Some tasks require minimum trust to accept (required_trust 1-4).
- Trust decays daily. Task failure and cancellation reduce trust.
"""

View file

@ -57,6 +57,7 @@ def sim_init(
horizon_years=horizon_years,
employee_count=employee_count,
market_task_count=market_task_count,
cfg=_wc,
start_date=start_dt,
)
result = seed_world_transactional(db, req)

View file

@ -90,19 +90,10 @@ def task_accept(
f"does not meet task requirement ({task.required_trust})."
)
# Apply trust reward multiplier and work reduction at accept time
# Apply trust work reduction at accept time (no reward multiplier —
# faster completion from trust already increases revenue via throughput).
_cfg = _get_world_cfg()
if task.client_id is not None:
client_row = db.query(Client).filter(Client.id == task.client_id).one_or_none()
client_multiplier = client_row.reward_multiplier if client_row else 1.0
# Reward: continuous formula
trust_multiplier = (
_cfg.trust_base_multiplier
+ (client_multiplier ** 2) * _cfg.trust_reward_scale
* (trust_level ** 2) / _cfg.trust_max
)
task.reward_funds_cents = int(task.reward_funds_cents * trust_multiplier)
# Work reduction: trusted clients give clearer specs → less work
work_reduction = _cfg.trust_work_reduction_max * (trust_level / _cfg.trust_max)
for r in reqs:
r.required_qty = int(float(r.required_qty) * (1 - work_reduction))

View file

@ -5,14 +5,8 @@ from .schema import ExperimentConfig, AgentConfig, LoopConfig, SimConfig, WorldC
def get_world_config() -> WorldConfig:
"""Load WorldConfig from the active experiment (YC_BENCH_EXPERIMENT env var, default: 'default').
Falls back to default WorldConfig if config loading fails (e.g. outside a benchmark run).
"""
try:
return load_config(os.environ.get("YC_BENCH_EXPERIMENT", "default")).world
except Exception:
return WorldConfig()
"""Load WorldConfig from the active experiment (YC_BENCH_EXPERIMENT env var, default: 'default')."""
return load_config(os.environ.get("YC_BENCH_EXPERIMENT", "default")).world
__all__ = [

View file

@ -63,6 +63,8 @@ market_browse_default_limit = 50
# Salary bump per completed task — each assigned employee gets this raise.
# Compounds over time, accelerating payroll pressure.
salary_bump_pct = 0.01
salary_max_cents = 10_000_000 # cap individual salary at $100K/month
skill_rate_max = 10.0 # cap employee skill rate
# --- Prestige mechanics ---
prestige_max = 10.0

View file

@ -107,50 +107,44 @@ class SimConfig(BaseModel):
# ---------------------------------------------------------------------------
class WorldConfig(BaseModel):
"""All world-generation parameters.
No defaults every field must be set explicitly in the TOML preset.
This prevents silent drift between schema.py and the preset files.
"""
# --- Workforce ---
num_employees: int = 10
initial_funds_cents: int = 15_000_000 # $150,000
initial_prestige_level: float = 1.0
work_hours_per_day: float = 9.0
num_employees: int
initial_funds_cents: int
initial_prestige_level: float
work_hours_per_day: float
# --- Market ---
num_market_tasks: int = 500
market_browse_default_limit: int = 50
num_market_tasks: int
market_browse_default_limit: int
# --- Salary bump on task completion ---
salary_bump_pct: float = 0.01 # 1% raise per assigned employee per completed task
salary_max_cents: int = 10_000_000 # cap individual salary at $100K/month
skill_rate_max: float = 30.0 # cap employee skill rate (prevents exponential skill compounding)
salary_bump_pct: float
salary_max_cents: int
skill_rate_max: float
# --- Prestige mechanics ---
prestige_max: float = 10.0
prestige_min: float = 1.0
penalty_fail_multiplier: float = 0.8
penalty_cancel_multiplier: float = 1.2
# Extra reward fraction per prestige level above 1.
# At 0.55: prestige-8 tasks pay ~4.85x more than prestige-1.
reward_prestige_scale: float = 0.3
# Daily prestige decay per domain. Domains not exercised lose prestige
# over time: -0.01/day → -0.3/month → untouched domain drops ~1 level
# every ~3 months. Floored at prestige_min.
prestige_decay_per_day: float = 0.005
prestige_max: float
prestige_min: float
penalty_fail_multiplier: float
penalty_cancel_multiplier: float
reward_prestige_scale: float
prestige_decay_per_day: float
# --- Client trust (intuitive knobs) ---
num_clients: int = 8
trust_max: float = 5.0
# ~how many successful tasks to reach 80% of max trust with one client
trust_build_rate: float = 20.0
# 0-1: how punishing failures/inactivity are (0=forgiving, 1=harsh)
trust_fragility: float = 0.5
# 0-1: how much working for one client hurts trust with others (0=none, 1=heavy)
trust_focus_pressure: float = 0.5
# payout multiplier a typical Premium client (mult≈1.3) gives at max trust
trust_reward_ceiling: float = 2.6
# max work reduction at max trust (0.4 = 40% less work)
trust_work_reduction_max: float = 0.40
# fraction of tasks that require trust (~0.2 = 20%)
trust_gating_fraction: float = 0.20
num_clients: int
trust_max: float
trust_build_rate: float
trust_fragility: float
trust_focus_pressure: float
trust_reward_ceiling: float
trust_work_reduction_max: float
trust_gating_fraction: float
# --- Derived trust params (computed from knobs above, do not set directly) ---
trust_min: float = 0.0
@ -175,46 +169,25 @@ class WorldConfig(BaseModel):
client_tier_enterprise_threshold: float = 1.7
task_specialty_domain_bias: float = 0.7
# Required qty scaling by prestige: qty *= 1 + prestige_qty_scale * (prestige - 1).
# At 0.3: prestige-5 tasks need 2.2× the work of prestige-1 tasks.
prestige_qty_scale: float = 0.3
# --- Task scaling ---
prestige_qty_scale: float
deadline_qty_per_day: float
deadline_min_biz_days: int
# --- Deadline computation ---
deadline_qty_per_day: float = 150.0 # max per-domain qty / this = deadline days
deadline_min_biz_days: int = 7
# --- Progress milestones (fraction thresholds that trigger checkpoint events) ---
task_progress_milestones: list[float] = Field(default_factory=lambda: [0.25, 0.5, 0.75])
# --- Progress milestones ---
task_progress_milestones: list[float]
# --- Business hours ---
workday_start_hour: int = 9
workday_end_hour: int = 18
workday_start_hour: int
workday_end_hour: int
# --- Distributions (shape of random draws during world generation) ---
# --- Distributions ---
dist: WorldDists = Field(default_factory=WorldDists)
# --- Salary tiers ---
salary_junior: SalaryTierConfig = Field(
default_factory=lambda: SalaryTierConfig(
name="junior", share=0.50,
min_cents=200_000, max_cents=400_000,
rate_min=1.0, rate_max=4.0,
)
)
salary_mid: SalaryTierConfig = Field(
default_factory=lambda: SalaryTierConfig(
name="mid", share=0.35,
min_cents=600_000, max_cents=800_000,
rate_min=4.0, rate_max=7.0,
)
)
salary_senior: SalaryTierConfig = Field(
default_factory=lambda: SalaryTierConfig(
name="senior", share=0.15,
min_cents=1_000_000, max_cents=1_500_000,
rate_min=7.0, rate_max=10.0,
)
)
salary_junior: SalaryTierConfig
salary_mid: SalaryTierConfig
salary_senior: SalaryTierConfig
@model_validator(mode="after")
def _derive_trust_params(self) -> WorldConfig:

View file

@ -44,10 +44,8 @@ class GeneratedClient:
specialty_domains: list[str] = field(default_factory=list)
def generate_clients(*, run_seed: int, count: int, cfg: WorldConfig | None = None) -> list[GeneratedClient]:
def generate_clients(*, run_seed: int, count: int, cfg: WorldConfig) -> list[GeneratedClient]:
"""Generate clients with seeded reward multipliers, tiers, and specialty domains."""
if cfg is None:
cfg = WorldConfig()
if count <= 0:
return []
if count > len(_CLIENT_NAME_POOL):

View file

@ -48,9 +48,7 @@ def _sample_domain_rates(rng, min_rate, max_rate):
return [round(rng.uniform(min_rate, max_rate), 4) for _ in range(_NUM_DOMAINS)]
def generate_employees(*, run_seed, count, cfg=None):
if cfg is None:
cfg = WorldConfig()
def generate_employees(*, run_seed, count, cfg):
if count <= 0:
return []
@ -83,7 +81,7 @@ def generate_employees(*, run_seed, count, cfg=None):
return employees
def build_employee_rows(*, run_seed, company_id, count, cfg=None):
def build_employee_rows(*, run_seed, company_id, count, cfg):
generated = generate_employees(run_seed=run_seed, count=count, cfg=cfg)
employee_rows = []
skill_rows = []

View file

@ -150,15 +150,13 @@ def _make_task(rng, cfg, prestige, serial, requirements, client_index=0):
)
def generate_tasks(*, run_seed, count, cfg=None, client_specialties=None):
def generate_tasks(*, run_seed, count, cfg, client_specialties=None):
"""Generate market tasks.
Args:
client_specialties: list of specialty domain lists, one per client index.
e.g. [["research", "training"], ["inference"]] for 2 clients.
"""
if cfg is None:
cfg = WorldConfig()
if count <= 0:
return []
@ -176,7 +174,7 @@ def generate_tasks(*, run_seed, count, cfg=None, client_specialties=None):
return out
def build_task_rows(*, run_seed, count, cfg=None):
def build_task_rows(*, run_seed, count, cfg):
generated = generate_tasks(run_seed=run_seed, count=count, cfg=cfg)
task_rows = []
requirement_rows = []
@ -208,10 +206,8 @@ def build_task_rows(*, run_seed, count, cfg=None):
return task_rows, requirement_rows
def generate_replacement_task(*, run_seed, replenish_counter, replaced_prestige, replaced_client_index=0, cfg=None, specialty_domains=None):
def generate_replacement_task(*, run_seed, replenish_counter, replaced_prestige, replaced_client_index=0, cfg, specialty_domains=None):
"""Generate a replacement task with the same prestige and client as the accepted task."""
if cfg is None:
cfg = WorldConfig()
streams = RngStreams(run_seed)
rng = streams.stream(f"replenish_{replenish_counter}")
requirements = _sample_requirements(rng, cfg, prestige=replaced_prestige, specialty_domains=specialty_domains)

View file

@ -26,8 +26,8 @@ class SeedWorldRequest:
horizon_years: int
employee_count: int
market_task_count: int
cfg: WorldConfig
start_date: datetime | None = None
cfg: WorldConfig = field(default_factory=WorldConfig)
@dataclass(frozen=True)