mirror of
https://github.com/collinear-ai/yc-bench.git
synced 2026-04-19 12:58:03 +00:00
fix sim resume
This commit is contained in:
parent
70ae316f27
commit
ebfce99643
4 changed files with 15 additions and 66 deletions
|
|
@ -167,37 +167,21 @@ def run_agent_loop(
|
|||
commands_executed = _extract_commands(result.raw_result)
|
||||
|
||||
resume_payload = result.resume_payload
|
||||
# Ignore blocked sim resume responses (ok=False means no time actually advanced)
|
||||
if resume_payload is not None and not resume_payload.get("ok", True):
|
||||
logger.info("Turn %d: sim resume was blocked (no active tasks).", turn_num)
|
||||
resume_payload = None
|
||||
if result.checkpoint_advanced and resume_payload is not None:
|
||||
logger.info("Turn %d: agent called sim resume.", turn_num)
|
||||
turns_since_resume = 0
|
||||
else:
|
||||
turns_since_resume += 1
|
||||
if command_executor is not None and turns_since_resume >= auto_advance_after_turns:
|
||||
# Only auto-advance if there are active tasks (employees working).
|
||||
# When idle (no active tasks), advancing just burns payroll with
|
||||
# zero productivity — let the agent keep planning instead.
|
||||
with db_factory() as db:
|
||||
idle_snapshot = _snapshot_state(db, company_id)
|
||||
has_active = idle_snapshot["active_tasks"] > 0
|
||||
if has_active:
|
||||
logger.info(
|
||||
"Turn %d: %d consecutive turns without sim resume; auto-advancing.",
|
||||
turn_num, turns_since_resume,
|
||||
)
|
||||
resume_payload, err = _auto_resume(command_executor)
|
||||
if err:
|
||||
logger.warning("Auto-resume failed on turn %d: %s", turn_num, err)
|
||||
else:
|
||||
turns_since_resume = 0
|
||||
logger.info(
|
||||
"Turn %d: %d consecutive turns without sim resume; auto-advancing.",
|
||||
turn_num, turns_since_resume,
|
||||
)
|
||||
resume_payload, err = _auto_resume(command_executor)
|
||||
if err:
|
||||
logger.warning("Auto-resume failed on turn %d: %s", turn_num, err)
|
||||
else:
|
||||
logger.info(
|
||||
"Turn %d: %d turns without resume but no active tasks; skipping auto-advance.",
|
||||
turn_num, turns_since_resume,
|
||||
)
|
||||
turns_since_resume = 0
|
||||
|
||||
if resume_payload is not None:
|
||||
# Query full state so the agent sees active/planned task counts
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ Your goal is to maximize company prestige and funds over the simulation horizon
|
|||
- Task completion after deadline = failure (0.8x prestige penalty, no reward, trust penalty)
|
||||
- Task cancellation = 1.2x prestige penalty per domain + trust penalty (worse than failure)
|
||||
- Employee throughput = base_rate / number_of_active_tasks_assigned
|
||||
- Time advances only when you run `yc-bench sim resume`. **Note**: `sim resume` is blocked if you have no active (dispatched) tasks — you must accept, assign, and dispatch at least one task before time can advance.
|
||||
- Time advances only when you run `yc-bench sim resume` — it jumps to the next event (task milestone at 25/50/75%, task completion, or monthly payroll). **Warning**: calling `sim resume` with no active tasks just skips to the next payroll, burning runway with zero revenue.
|
||||
- Prestige is clamped [1, 10]. Funds are in cents.
|
||||
|
||||
## Client Trust
|
||||
|
|
@ -149,13 +149,14 @@ def build_turn_context(
|
|||
if active_tasks == 0 and planned_tasks == 0:
|
||||
parts.append(
|
||||
"\n**ACTION REQUIRED**: No tasks are running. "
|
||||
"`sim resume` is BLOCKED until you have active tasks. "
|
||||
"Accept a task, assign employees to it, and dispatch it now."
|
||||
"Do NOT call `sim resume` — it will just burn payroll with zero revenue. "
|
||||
"Accept a task, assign employees to it, and dispatch it first."
|
||||
)
|
||||
elif planned_tasks > 0 and active_tasks == 0:
|
||||
parts.append(
|
||||
"\n**ACTION REQUIRED**: You have planned tasks but none are dispatched. "
|
||||
"`sim resume` is BLOCKED until you dispatch. Assign employees and dispatch now."
|
||||
"Do NOT call `sim resume` yet — dispatch first or you'll just burn payroll. "
|
||||
"Assign employees and dispatch now."
|
||||
)
|
||||
else:
|
||||
parts.append("\nDecide your next actions. Use `run_command` to execute CLI commands.")
|
||||
|
|
|
|||
|
|
@ -101,40 +101,6 @@ def sim_resume():
|
|||
error_output("No simulation found. Run `yc-bench sim init` first.")
|
||||
company = db.query(Company).filter(Company.id == sim_state.company_id).one()
|
||||
|
||||
# Block sim resume when no active tasks — advancing idle burns payroll
|
||||
from sqlalchemy import func
|
||||
from ..db.models.task import Task, TaskStatus
|
||||
active_count = db.query(func.count(Task.id)).filter(
|
||||
Task.company_id == sim_state.company_id,
|
||||
Task.status == TaskStatus.ACTIVE,
|
||||
).scalar() or 0
|
||||
|
||||
if active_count == 0:
|
||||
planned_count = db.query(func.count(Task.id)).filter(
|
||||
Task.company_id == sim_state.company_id,
|
||||
Task.status == TaskStatus.PLANNED,
|
||||
).scalar() or 0
|
||||
if planned_count > 0:
|
||||
json_output({
|
||||
"ok": False,
|
||||
"error": "BLOCKED: You have planned tasks but none are dispatched (active). "
|
||||
"Assign employees and run `yc-bench task dispatch --task-id <UUID>` "
|
||||
"before calling sim resume. Advancing time now would waste runway.",
|
||||
"active_tasks": 0,
|
||||
"planned_tasks": planned_count,
|
||||
})
|
||||
return
|
||||
else:
|
||||
json_output({
|
||||
"ok": False,
|
||||
"error": "BLOCKED: No active tasks. Advancing time with no work in progress "
|
||||
"just burns payroll. Accept a task, assign employees, dispatch it, "
|
||||
"THEN call sim resume.",
|
||||
"active_tasks": 0,
|
||||
"planned_tasks": 0,
|
||||
})
|
||||
return
|
||||
|
||||
next_event = fetch_next_event(
|
||||
db=db,
|
||||
company_id=sim_state.company_id,
|
||||
|
|
|
|||
|
|
@ -109,11 +109,9 @@ Diminishing returns: ~0.40/task at trust 0, ~0.07/task at trust 4.
|
|||
|
||||
**Why**: Cross-client decay penalizes scattering and rewards focusing on 2–3 clients.
|
||||
|
||||
## Sim Resume Blocking
|
||||
## Sim Resume When Idle
|
||||
|
||||
`sim resume` is blocked when no active tasks. Auto-advance also skips when idle.
|
||||
|
||||
**Why**: Prevents LLM from burning months of payroll while doing nothing.
|
||||
`sim resume` is allowed even with no active tasks — time moves forward regardless. Calling it while idle advances to the next payroll event, burning runway with zero revenue. The prompt warns the agent not to do this, but doesn't prevent it. If the agent ignores the warning and burns payroll, that's a valid failure mode.
|
||||
|
||||
## Agent Visibility
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue