From d829b07e6068f37af5df307fd8292a683f77e707 Mon Sep 17 00:00:00 2001 From: alckasoc Date: Fri, 20 Mar 2026 06:01:04 -0700 Subject: [PATCH] update prompt --- .gitignore | 1 + run_all_models.sh | 8 -------- src/yc_bench/agent/prompt.py | 4 ++-- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 8d85e21..eed6c30 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,4 @@ agent.md # Claude session context — local only CLAUDE.md +old/ diff --git a/run_all_models.sh b/run_all_models.sh index 0703c1a..1668549 100755 --- a/run_all_models.sh +++ b/run_all_models.sh @@ -45,14 +45,6 @@ echo "Seeds: $SEEDS" echo "Models: ${#ALL_MODELS[@]}" echo "" -# Run greedy bot baseline first -echo "--- Running greedy bot baseline ---" -for seed in $SEEDS; do - echo " greedy_bot | $CONFIG seed=$seed" - uv run python scripts/bot_runner.py --bot greedy --config "$CONFIG" --seed "$seed" -done -echo "" - # Run all LLM models for model in "${ALL_MODELS[@]}"; do for seed in $SEEDS; do diff --git a/src/yc_bench/agent/prompt.py b/src/yc_bench/agent/prompt.py index 5e78a50..77470b8 100644 --- a/src/yc_bench/agent/prompt.py +++ b/src/yc_bench/agent/prompt.py @@ -18,7 +18,7 @@ All actions use `yc-bench` CLI commands via `run_command`. All return JSON. Run multiple tasks concurrently when possible. Accept → assign → dispatch a second task before calling sim resume. -**Use `yc-bench scratchpad write`** to save strategy notes — your conversation history is truncated after 20 turns, but scratchpad persists in the system prompt. Write rules, not events (e.g. "assign Emp_1,Emp_4,Emp_7 for inference tasks" not "Task-42 failed"). +**Use `yc-bench scratchpad write`** to save strategy notes — your conversation history is truncated after 20 turns, but scratchpad persists in the system prompt. Write reusable rules, not one-off observations. ## Commands @@ -43,7 +43,7 @@ Run multiple tasks concurrently when possible. Accept → assign → dispatch a ## Key Mechanics -- **Salary bumps**: completed tasks raise salary for every assigned employee. Assigning all 8 to every task compounds payroll until it exceeds revenue — assign 3-4 domain specialists instead. +- **Salary bumps**: completed tasks raise salary for every assigned employee. More employees assigned = higher payroll growth. - **Throughput split**: employees on multiple active tasks split their rate (rate/sqrt(N)). Two tasks run at ~71% each. - **Deadlines**: success before deadline = reward + prestige. Failure = prestige penalty, no reward. - **Trust**: completing tasks for a client builds trust → less work per task, access to gated tasks. Working for one client erodes trust with others.