mirror of
https://github.com/collinear-ai/yc-bench.git
synced 2026-04-19 12:58:03 +00:00
Capped skill rate at 10 + removed reward mult from clients
This commit is contained in:
parent
d976b9cbb4
commit
140bb58653
11 changed files with 64 additions and 148 deletions
|
|
@ -256,6 +256,7 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn)
|
|||
horizon_years=cfg.sim.horizon_years,
|
||||
employee_count=world_cfg.num_employees,
|
||||
market_task_count=world_cfg.num_market_tasks,
|
||||
cfg=world_cfg,
|
||||
start_date=start_dt,
|
||||
)
|
||||
result = seed_world_transactional(db, req)
|
||||
|
|
@ -333,22 +334,14 @@ def run_bot(config_name: str, seed: int, bot_slug: str, strategy_fn: StrategyFn)
|
|||
TaskRequirement.task_id == task.id
|
||||
).all()
|
||||
|
||||
# Apply trust reward multiplier and work reduction
|
||||
# Apply trust work reduction (no reward multiplier)
|
||||
if task.client_id is not None:
|
||||
from yc_bench.db.models.client import Client, ClientTrust
|
||||
client_row = db.query(Client).filter(Client.id == task.client_id).one_or_none()
|
||||
client_multiplier = client_row.reward_multiplier if client_row else 1.0
|
||||
from yc_bench.db.models.client import ClientTrust
|
||||
ct = db.query(ClientTrust).filter(
|
||||
ClientTrust.company_id == company_id,
|
||||
ClientTrust.client_id == task.client_id,
|
||||
).one_or_none()
|
||||
trust_level = float(ct.trust_level) if ct else 0.0
|
||||
trust_multiplier = (
|
||||
world_cfg.trust_base_multiplier
|
||||
+ (client_multiplier ** 2) * world_cfg.trust_reward_scale
|
||||
* (trust_level ** 2) / world_cfg.trust_max
|
||||
)
|
||||
task.reward_funds_cents = int(task.reward_funds_cents * trust_multiplier)
|
||||
work_reduction = world_cfg.trust_work_reduction_max * (trust_level / world_cfg.trust_max)
|
||||
for r in reqs:
|
||||
r.required_qty = int(float(r.required_qty) * (1 - work_reduction))
|
||||
|
|
|
|||
|
|
@ -40,19 +40,6 @@ Your goal is to maximize company prestige and funds over the simulation horizon
|
|||
- `yc-bench task cancel --task-id <UUID> --reason "text"` — cancel a task (prestige penalty: 1.2x reward delta)
|
||||
- `yc-bench sim resume` — advance simulation to the next checkpoint event and return wake events
|
||||
|
||||
## Strategy Guidelines
|
||||
|
||||
1. **Check company status first** to understand your financial position, runway, and **current prestige levels per domain**.
|
||||
2. **Browse the market at your prestige level** — use `--required-prestige-lte N` where N matches your highest prestige. Higher-prestige tasks pay significantly more (prestige-5 tasks pay ~2.2x more than prestige-1). As your prestige grows, ALWAYS increase your browse filter to find better-paying tasks.
|
||||
3. **Accept tasks** that match your capabilities and offer good reward-to-risk ratio.
|
||||
4. **Assign employees strategically** — employees split throughput across active tasks. Focus employees on fewer tasks for faster completion.
|
||||
5. **Dispatch tasks** once assigned, then continue monitoring progress/events via status and reports.
|
||||
6. **Monitor deadlines** — completing after deadline causes failure (0.8x prestige penalty). Cancel hopeless tasks early (1.2x penalty, but stops bleeding time).
|
||||
7. **Watch payroll** — monthly salaries are deducted automatically. Don't let runway drop to zero.
|
||||
8. **Use status checks** to track critical milestones and risks.
|
||||
9. **Successful tasks** award funds + prestige + employee skill boosts. Build momentum.
|
||||
10. **Scale up over time** — regularly check `yc-bench company status` to see your prestige. Browse higher-prestige tasks as you grow — staying on prestige-1 tasks when you have prestige 5+ leaves enormous revenue on the table.
|
||||
|
||||
## Key Rules
|
||||
|
||||
- Task completion at or before deadline = success (reward funds + prestige + skill boost + client trust gain)
|
||||
|
|
@ -65,31 +52,14 @@ Your goal is to maximize company prestige and funds over the simulation horizon
|
|||
## Client Trust
|
||||
|
||||
- Each task is offered by a specific **client** (e.g. "Nexus AI", "Vertex Labs").
|
||||
- Trust affects TWO things: **reward** and **work required**.
|
||||
- Each client has **specialty domains** (e.g. "research", "training"). Tasks from a client are biased toward their specialties.
|
||||
- Use `yc-bench client list` to see each client's specialties and current trust level.
|
||||
|
||||
### Client Tiers and Specialties
|
||||
- Clients are classified into **tiers**: Standard, Premium, Enterprise. Higher tiers have higher reward potential at high trust.
|
||||
- Each client has **specialty domains** (e.g. "research", "training"). Tasks from a client are biased toward their specialties — a client specializing in "research" will mostly offer research-heavy tasks.
|
||||
- Use `yc-bench client list` to see each client's tier, specialties, and current trust level.
|
||||
|
||||
### Reward Scaling
|
||||
- Listed rewards are **potential** — actual payout depends on trust. Without trust, you only receive about **50%** of the listed reward.
|
||||
- As trust grows, payouts increase significantly. Higher-tier clients scale better but start worse.
|
||||
- Observe actual payouts over time to gauge each client's true value.
|
||||
|
||||
### Work Reduction
|
||||
- **Trusted clients give clearer specs** — work required shrinks with trust (up to 40% reduction at max trust).
|
||||
- This compounds with higher rewards: at high trust, you earn more in less time.
|
||||
|
||||
### Strategy
|
||||
- **Domain alignment matters most**: Pick clients whose specialties match your company's prestige strengths. A Premium client aligned with your best domains may outperform an Enterprise client where you're weak.
|
||||
- **Focus on 2-3 clients** to build trust fast. Scattered work = perpetual low payouts + full work load.
|
||||
- **Compounding loop**: trust → less work → faster completion → more tasks per month → more trust → even better returns.
|
||||
- **Higher-tier clients are riskier early**: they pay less than Standard clients at low trust, but become very rewarding at high trust. Standard clients are safer to start with.
|
||||
### Mechanics
|
||||
- Completing tasks for a client builds **trust** [0.0–5.0]. Trust gains diminish as you approach max.
|
||||
- **Premium tasks require trust**: High-reward tasks require established trust (required_trust 1-4). Clients don't give their best projects to unproven vendors.
|
||||
- **Trust decays** daily — relationships need maintenance through continued work.
|
||||
- **Failures hurt**: -0.3 trust. **Cancellations hurt more**: -0.5 trust.
|
||||
- Trusted clients require less work (up to 35% reduction at max trust).
|
||||
- Some tasks require minimum trust to accept (required_trust 1-4).
|
||||
- Trust decays daily. Task failure and cancellation reduce trust.
|
||||
"""
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ def sim_init(
|
|||
horizon_years=horizon_years,
|
||||
employee_count=employee_count,
|
||||
market_task_count=market_task_count,
|
||||
cfg=_wc,
|
||||
start_date=start_dt,
|
||||
)
|
||||
result = seed_world_transactional(db, req)
|
||||
|
|
|
|||
|
|
@ -90,19 +90,10 @@ def task_accept(
|
|||
f"does not meet task requirement ({task.required_trust})."
|
||||
)
|
||||
|
||||
# Apply trust reward multiplier and work reduction at accept time
|
||||
# Apply trust work reduction at accept time (no reward multiplier —
|
||||
# faster completion from trust already increases revenue via throughput).
|
||||
_cfg = _get_world_cfg()
|
||||
if task.client_id is not None:
|
||||
client_row = db.query(Client).filter(Client.id == task.client_id).one_or_none()
|
||||
client_multiplier = client_row.reward_multiplier if client_row else 1.0
|
||||
# Reward: continuous formula
|
||||
trust_multiplier = (
|
||||
_cfg.trust_base_multiplier
|
||||
+ (client_multiplier ** 2) * _cfg.trust_reward_scale
|
||||
* (trust_level ** 2) / _cfg.trust_max
|
||||
)
|
||||
task.reward_funds_cents = int(task.reward_funds_cents * trust_multiplier)
|
||||
# Work reduction: trusted clients give clearer specs → less work
|
||||
work_reduction = _cfg.trust_work_reduction_max * (trust_level / _cfg.trust_max)
|
||||
for r in reqs:
|
||||
r.required_qty = int(float(r.required_qty) * (1 - work_reduction))
|
||||
|
|
|
|||
|
|
@ -5,14 +5,8 @@ from .schema import ExperimentConfig, AgentConfig, LoopConfig, SimConfig, WorldC
|
|||
|
||||
|
||||
def get_world_config() -> WorldConfig:
|
||||
"""Load WorldConfig from the active experiment (YC_BENCH_EXPERIMENT env var, default: 'default').
|
||||
|
||||
Falls back to default WorldConfig if config loading fails (e.g. outside a benchmark run).
|
||||
"""
|
||||
try:
|
||||
return load_config(os.environ.get("YC_BENCH_EXPERIMENT", "default")).world
|
||||
except Exception:
|
||||
return WorldConfig()
|
||||
"""Load WorldConfig from the active experiment (YC_BENCH_EXPERIMENT env var, default: 'default')."""
|
||||
return load_config(os.environ.get("YC_BENCH_EXPERIMENT", "default")).world
|
||||
|
||||
|
||||
__all__ = [
|
||||
|
|
|
|||
|
|
@ -63,6 +63,8 @@ market_browse_default_limit = 50
|
|||
# Salary bump per completed task — each assigned employee gets this raise.
|
||||
# Compounds over time, accelerating payroll pressure.
|
||||
salary_bump_pct = 0.01
|
||||
salary_max_cents = 10_000_000 # cap individual salary at $100K/month
|
||||
skill_rate_max = 10.0 # cap employee skill rate
|
||||
|
||||
# --- Prestige mechanics ---
|
||||
prestige_max = 10.0
|
||||
|
|
|
|||
|
|
@ -107,50 +107,44 @@ class SimConfig(BaseModel):
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
class WorldConfig(BaseModel):
|
||||
"""All world-generation parameters.
|
||||
|
||||
No defaults — every field must be set explicitly in the TOML preset.
|
||||
This prevents silent drift between schema.py and the preset files.
|
||||
"""
|
||||
|
||||
# --- Workforce ---
|
||||
num_employees: int = 10
|
||||
initial_funds_cents: int = 15_000_000 # $150,000
|
||||
initial_prestige_level: float = 1.0
|
||||
work_hours_per_day: float = 9.0
|
||||
num_employees: int
|
||||
initial_funds_cents: int
|
||||
initial_prestige_level: float
|
||||
work_hours_per_day: float
|
||||
|
||||
# --- Market ---
|
||||
num_market_tasks: int = 500
|
||||
market_browse_default_limit: int = 50
|
||||
num_market_tasks: int
|
||||
market_browse_default_limit: int
|
||||
|
||||
# --- Salary bump on task completion ---
|
||||
salary_bump_pct: float = 0.01 # 1% raise per assigned employee per completed task
|
||||
salary_max_cents: int = 10_000_000 # cap individual salary at $100K/month
|
||||
skill_rate_max: float = 30.0 # cap employee skill rate (prevents exponential skill compounding)
|
||||
salary_bump_pct: float
|
||||
salary_max_cents: int
|
||||
skill_rate_max: float
|
||||
|
||||
# --- Prestige mechanics ---
|
||||
prestige_max: float = 10.0
|
||||
prestige_min: float = 1.0
|
||||
penalty_fail_multiplier: float = 0.8
|
||||
penalty_cancel_multiplier: float = 1.2
|
||||
# Extra reward fraction per prestige level above 1.
|
||||
# At 0.55: prestige-8 tasks pay ~4.85x more than prestige-1.
|
||||
reward_prestige_scale: float = 0.3
|
||||
|
||||
# Daily prestige decay per domain. Domains not exercised lose prestige
|
||||
# over time: -0.01/day → -0.3/month → untouched domain drops ~1 level
|
||||
# every ~3 months. Floored at prestige_min.
|
||||
prestige_decay_per_day: float = 0.005
|
||||
prestige_max: float
|
||||
prestige_min: float
|
||||
penalty_fail_multiplier: float
|
||||
penalty_cancel_multiplier: float
|
||||
reward_prestige_scale: float
|
||||
prestige_decay_per_day: float
|
||||
|
||||
# --- Client trust (intuitive knobs) ---
|
||||
num_clients: int = 8
|
||||
trust_max: float = 5.0
|
||||
# ~how many successful tasks to reach 80% of max trust with one client
|
||||
trust_build_rate: float = 20.0
|
||||
# 0-1: how punishing failures/inactivity are (0=forgiving, 1=harsh)
|
||||
trust_fragility: float = 0.5
|
||||
# 0-1: how much working for one client hurts trust with others (0=none, 1=heavy)
|
||||
trust_focus_pressure: float = 0.5
|
||||
# payout multiplier a typical Premium client (mult≈1.3) gives at max trust
|
||||
trust_reward_ceiling: float = 2.6
|
||||
# max work reduction at max trust (0.4 = 40% less work)
|
||||
trust_work_reduction_max: float = 0.40
|
||||
# fraction of tasks that require trust (~0.2 = 20%)
|
||||
trust_gating_fraction: float = 0.20
|
||||
num_clients: int
|
||||
trust_max: float
|
||||
trust_build_rate: float
|
||||
trust_fragility: float
|
||||
trust_focus_pressure: float
|
||||
trust_reward_ceiling: float
|
||||
trust_work_reduction_max: float
|
||||
trust_gating_fraction: float
|
||||
|
||||
# --- Derived trust params (computed from knobs above, do not set directly) ---
|
||||
trust_min: float = 0.0
|
||||
|
|
@ -175,46 +169,25 @@ class WorldConfig(BaseModel):
|
|||
client_tier_enterprise_threshold: float = 1.7
|
||||
task_specialty_domain_bias: float = 0.7
|
||||
|
||||
# Required qty scaling by prestige: qty *= 1 + prestige_qty_scale * (prestige - 1).
|
||||
# At 0.3: prestige-5 tasks need 2.2× the work of prestige-1 tasks.
|
||||
prestige_qty_scale: float = 0.3
|
||||
# --- Task scaling ---
|
||||
prestige_qty_scale: float
|
||||
deadline_qty_per_day: float
|
||||
deadline_min_biz_days: int
|
||||
|
||||
# --- Deadline computation ---
|
||||
deadline_qty_per_day: float = 150.0 # max per-domain qty / this = deadline days
|
||||
deadline_min_biz_days: int = 7
|
||||
|
||||
# --- Progress milestones (fraction thresholds that trigger checkpoint events) ---
|
||||
task_progress_milestones: list[float] = Field(default_factory=lambda: [0.25, 0.5, 0.75])
|
||||
# --- Progress milestones ---
|
||||
task_progress_milestones: list[float]
|
||||
|
||||
# --- Business hours ---
|
||||
workday_start_hour: int = 9
|
||||
workday_end_hour: int = 18
|
||||
workday_start_hour: int
|
||||
workday_end_hour: int
|
||||
|
||||
# --- Distributions (shape of random draws during world generation) ---
|
||||
# --- Distributions ---
|
||||
dist: WorldDists = Field(default_factory=WorldDists)
|
||||
|
||||
# --- Salary tiers ---
|
||||
salary_junior: SalaryTierConfig = Field(
|
||||
default_factory=lambda: SalaryTierConfig(
|
||||
name="junior", share=0.50,
|
||||
min_cents=200_000, max_cents=400_000,
|
||||
rate_min=1.0, rate_max=4.0,
|
||||
)
|
||||
)
|
||||
salary_mid: SalaryTierConfig = Field(
|
||||
default_factory=lambda: SalaryTierConfig(
|
||||
name="mid", share=0.35,
|
||||
min_cents=600_000, max_cents=800_000,
|
||||
rate_min=4.0, rate_max=7.0,
|
||||
)
|
||||
)
|
||||
salary_senior: SalaryTierConfig = Field(
|
||||
default_factory=lambda: SalaryTierConfig(
|
||||
name="senior", share=0.15,
|
||||
min_cents=1_000_000, max_cents=1_500_000,
|
||||
rate_min=7.0, rate_max=10.0,
|
||||
)
|
||||
)
|
||||
salary_junior: SalaryTierConfig
|
||||
salary_mid: SalaryTierConfig
|
||||
salary_senior: SalaryTierConfig
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _derive_trust_params(self) -> WorldConfig:
|
||||
|
|
|
|||
|
|
@ -44,10 +44,8 @@ class GeneratedClient:
|
|||
specialty_domains: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def generate_clients(*, run_seed: int, count: int, cfg: WorldConfig | None = None) -> list[GeneratedClient]:
|
||||
def generate_clients(*, run_seed: int, count: int, cfg: WorldConfig) -> list[GeneratedClient]:
|
||||
"""Generate clients with seeded reward multipliers, tiers, and specialty domains."""
|
||||
if cfg is None:
|
||||
cfg = WorldConfig()
|
||||
if count <= 0:
|
||||
return []
|
||||
if count > len(_CLIENT_NAME_POOL):
|
||||
|
|
|
|||
|
|
@ -48,9 +48,7 @@ def _sample_domain_rates(rng, min_rate, max_rate):
|
|||
return [round(rng.uniform(min_rate, max_rate), 4) for _ in range(_NUM_DOMAINS)]
|
||||
|
||||
|
||||
def generate_employees(*, run_seed, count, cfg=None):
|
||||
if cfg is None:
|
||||
cfg = WorldConfig()
|
||||
def generate_employees(*, run_seed, count, cfg):
|
||||
if count <= 0:
|
||||
return []
|
||||
|
||||
|
|
@ -83,7 +81,7 @@ def generate_employees(*, run_seed, count, cfg=None):
|
|||
return employees
|
||||
|
||||
|
||||
def build_employee_rows(*, run_seed, company_id, count, cfg=None):
|
||||
def build_employee_rows(*, run_seed, company_id, count, cfg):
|
||||
generated = generate_employees(run_seed=run_seed, count=count, cfg=cfg)
|
||||
employee_rows = []
|
||||
skill_rows = []
|
||||
|
|
|
|||
|
|
@ -150,15 +150,13 @@ def _make_task(rng, cfg, prestige, serial, requirements, client_index=0):
|
|||
)
|
||||
|
||||
|
||||
def generate_tasks(*, run_seed, count, cfg=None, client_specialties=None):
|
||||
def generate_tasks(*, run_seed, count, cfg, client_specialties=None):
|
||||
"""Generate market tasks.
|
||||
|
||||
Args:
|
||||
client_specialties: list of specialty domain lists, one per client index.
|
||||
e.g. [["research", "training"], ["inference"]] for 2 clients.
|
||||
"""
|
||||
if cfg is None:
|
||||
cfg = WorldConfig()
|
||||
if count <= 0:
|
||||
return []
|
||||
|
||||
|
|
@ -176,7 +174,7 @@ def generate_tasks(*, run_seed, count, cfg=None, client_specialties=None):
|
|||
return out
|
||||
|
||||
|
||||
def build_task_rows(*, run_seed, count, cfg=None):
|
||||
def build_task_rows(*, run_seed, count, cfg):
|
||||
generated = generate_tasks(run_seed=run_seed, count=count, cfg=cfg)
|
||||
task_rows = []
|
||||
requirement_rows = []
|
||||
|
|
@ -208,10 +206,8 @@ def build_task_rows(*, run_seed, count, cfg=None):
|
|||
return task_rows, requirement_rows
|
||||
|
||||
|
||||
def generate_replacement_task(*, run_seed, replenish_counter, replaced_prestige, replaced_client_index=0, cfg=None, specialty_domains=None):
|
||||
def generate_replacement_task(*, run_seed, replenish_counter, replaced_prestige, replaced_client_index=0, cfg, specialty_domains=None):
|
||||
"""Generate a replacement task with the same prestige and client as the accepted task."""
|
||||
if cfg is None:
|
||||
cfg = WorldConfig()
|
||||
streams = RngStreams(run_seed)
|
||||
rng = streams.stream(f"replenish_{replenish_counter}")
|
||||
requirements = _sample_requirements(rng, cfg, prestige=replaced_prestige, specialty_domains=specialty_domains)
|
||||
|
|
|
|||
|
|
@ -26,8 +26,8 @@ class SeedWorldRequest:
|
|||
horizon_years: int
|
||||
employee_count: int
|
||||
market_task_count: int
|
||||
cfg: WorldConfig
|
||||
start_date: datetime | None = None
|
||||
cfg: WorldConfig = field(default_factory=WorldConfig)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue