fix sim resume

This commit is contained in:
alckasoc 2026-03-12 12:21:42 -07:00
parent 70ae316f27
commit ebfce99643
4 changed files with 15 additions and 66 deletions

View file

@ -167,37 +167,21 @@ def run_agent_loop(
commands_executed = _extract_commands(result.raw_result)
resume_payload = result.resume_payload
# Ignore blocked sim resume responses (ok=False means no time actually advanced)
if resume_payload is not None and not resume_payload.get("ok", True):
logger.info("Turn %d: sim resume was blocked (no active tasks).", turn_num)
resume_payload = None
if result.checkpoint_advanced and resume_payload is not None:
logger.info("Turn %d: agent called sim resume.", turn_num)
turns_since_resume = 0
else:
turns_since_resume += 1
if command_executor is not None and turns_since_resume >= auto_advance_after_turns:
# Only auto-advance if there are active tasks (employees working).
# When idle (no active tasks), advancing just burns payroll with
# zero productivity — let the agent keep planning instead.
with db_factory() as db:
idle_snapshot = _snapshot_state(db, company_id)
has_active = idle_snapshot["active_tasks"] > 0
if has_active:
logger.info(
"Turn %d: %d consecutive turns without sim resume; auto-advancing.",
turn_num, turns_since_resume,
)
resume_payload, err = _auto_resume(command_executor)
if err:
logger.warning("Auto-resume failed on turn %d: %s", turn_num, err)
else:
turns_since_resume = 0
logger.info(
"Turn %d: %d consecutive turns without sim resume; auto-advancing.",
turn_num, turns_since_resume,
)
resume_payload, err = _auto_resume(command_executor)
if err:
logger.warning("Auto-resume failed on turn %d: %s", turn_num, err)
else:
logger.info(
"Turn %d: %d turns without resume but no active tasks; skipping auto-advance.",
turn_num, turns_since_resume,
)
turns_since_resume = 0
if resume_payload is not None:
# Query full state so the agent sees active/planned task counts

View file

@ -59,7 +59,7 @@ Your goal is to maximize company prestige and funds over the simulation horizon
- Task completion after deadline = failure (0.8x prestige penalty, no reward, trust penalty)
- Task cancellation = 1.2x prestige penalty per domain + trust penalty (worse than failure)
- Employee throughput = base_rate / number_of_active_tasks_assigned
- Time advances only when you run `yc-bench sim resume`. **Note**: `sim resume` is blocked if you have no active (dispatched) tasks you must accept, assign, and dispatch at least one task before time can advance.
- Time advances only when you run `yc-bench sim resume` it jumps to the next event (task milestone at 25/50/75%, task completion, or monthly payroll). **Warning**: calling `sim resume` with no active tasks just skips to the next payroll, burning runway with zero revenue.
- Prestige is clamped [1, 10]. Funds are in cents.
## Client Trust
@ -149,13 +149,14 @@ def build_turn_context(
if active_tasks == 0 and planned_tasks == 0:
parts.append(
"\n**ACTION REQUIRED**: No tasks are running. "
"`sim resume` is BLOCKED until you have active tasks. "
"Accept a task, assign employees to it, and dispatch it now."
"Do NOT call `sim resume` — it will just burn payroll with zero revenue. "
"Accept a task, assign employees to it, and dispatch it first."
)
elif planned_tasks > 0 and active_tasks == 0:
parts.append(
"\n**ACTION REQUIRED**: You have planned tasks but none are dispatched. "
"`sim resume` is BLOCKED until you dispatch. Assign employees and dispatch now."
"Do NOT call `sim resume` yet — dispatch first or you'll just burn payroll. "
"Assign employees and dispatch now."
)
else:
parts.append("\nDecide your next actions. Use `run_command` to execute CLI commands.")

View file

@ -101,40 +101,6 @@ def sim_resume():
error_output("No simulation found. Run `yc-bench sim init` first.")
company = db.query(Company).filter(Company.id == sim_state.company_id).one()
# Block sim resume when no active tasks — advancing idle burns payroll
from sqlalchemy import func
from ..db.models.task import Task, TaskStatus
active_count = db.query(func.count(Task.id)).filter(
Task.company_id == sim_state.company_id,
Task.status == TaskStatus.ACTIVE,
).scalar() or 0
if active_count == 0:
planned_count = db.query(func.count(Task.id)).filter(
Task.company_id == sim_state.company_id,
Task.status == TaskStatus.PLANNED,
).scalar() or 0
if planned_count > 0:
json_output({
"ok": False,
"error": "BLOCKED: You have planned tasks but none are dispatched (active). "
"Assign employees and run `yc-bench task dispatch --task-id <UUID>` "
"before calling sim resume. Advancing time now would waste runway.",
"active_tasks": 0,
"planned_tasks": planned_count,
})
return
else:
json_output({
"ok": False,
"error": "BLOCKED: No active tasks. Advancing time with no work in progress "
"just burns payroll. Accept a task, assign employees, dispatch it, "
"THEN call sim resume.",
"active_tasks": 0,
"planned_tasks": 0,
})
return
next_event = fetch_next_event(
db=db,
company_id=sim_state.company_id,

View file

@ -109,11 +109,9 @@ Diminishing returns: ~0.40/task at trust 0, ~0.07/task at trust 4.
**Why**: Cross-client decay penalizes scattering and rewards focusing on 23 clients.
## Sim Resume Blocking
## Sim Resume When Idle
`sim resume` is blocked when no active tasks. Auto-advance also skips when idle.
**Why**: Prevents LLM from burning months of payroll while doing nothing.
`sim resume` is allowed even with no active tasks — time moves forward regardless. Calling it while idle advances to the next payroll event, burning runway with zero revenue. The prompt warns the agent not to do this, but doesn't prevent it. If the agent ignores the warning and burns payroll, that's a valid failure mode.
## Agent Visibility