diff --git a/src/yc_bench/agent/loop.py b/src/yc_bench/agent/loop.py index ed10f11..6498a8b 100644 --- a/src/yc_bench/agent/loop.py +++ b/src/yc_bench/agent/loop.py @@ -116,6 +116,8 @@ def run_agent_loop( command_executor=None, auto_advance_after_turns: int = 10, max_turns: int | None = None, + on_turn_start=None, + on_turn=None, ) -> RunState: run_state.start() turns_since_resume = 0 # consecutive turns without sim resume @@ -146,6 +148,9 @@ def run_agent_loop( **snapshot, ) + if on_turn_start is not None: + on_turn_start(turn_num) + try: result = runtime.run_turn( RuntimeTurnRequest( @@ -195,6 +200,11 @@ def run_agent_loop( turn_cost_usd=getattr(result, "turn_cost_usd", 0.0), ) + if on_turn is not None: + with db_factory() as db: + post_snapshot = _snapshot_state(db, company_id) + on_turn(post_snapshot, run_state, commands_executed) + logger.info( "Turn %d complete. Agent output length: %d, commands: %d", turn_num, len(agent_output), len(commands_executed), diff --git a/src/yc_bench/cli/__init__.py b/src/yc_bench/cli/__init__.py index f158230..cdd6c25 100644 --- a/src/yc_bench/cli/__init__.py +++ b/src/yc_bench/cli/__init__.py @@ -95,6 +95,7 @@ def run_command_cli( "default", "--config", help="Preset name ('default', 'fast_test', 'high_reward') or path to a .toml file", ), + no_live: bool = typer.Option(False, "--no-live", help="Disable the live terminal dashboard"), ): """Run a full benchmark: migrate DB, seed world, run agent loop to completion.""" from dotenv import find_dotenv, load_dotenv @@ -109,6 +110,7 @@ def run_command_cli( company_name=company_name, start_date=start_date, config_name=config_name, + no_live=no_live, ) raise SystemExit(run_benchmark(args)) diff --git a/src/yc_bench/runner/args.py b/src/yc_bench/runner/args.py index 2220fe3..d8bd705 100644 --- a/src/yc_bench/runner/args.py +++ b/src/yc_bench/runner/args.py @@ -12,6 +12,7 @@ class RunArgs: company_name: str start_date: str config_name: str = "default" + no_live: bool = False def build_parser(): parser = argparse.ArgumentParser( @@ -27,6 +28,10 @@ def build_parser(): "--config", dest="config_name", default="default", help="Preset name ('default', 'fast_test', 'high_reward') or path to a .toml file", ) + parser.add_argument( + "--no-live", action="store_true", default=False, + help="Disable the live terminal dashboard (show raw log output instead)", + ) return parser def parse_run_args(argv): @@ -40,6 +45,7 @@ def parse_run_args(argv): company_name=ns.company_name, start_date=ns.start_date, config_name=ns.config_name, + no_live=ns.no_live, ) def _validate(ns, parser): diff --git a/src/yc_bench/runner/dashboard.py b/src/yc_bench/runner/dashboard.py new file mode 100644 index 0000000..b265955 --- /dev/null +++ b/src/yc_bench/runner/dashboard.py @@ -0,0 +1,454 @@ +"""Live terminal dashboard for YC-Bench using Rich.""" +from __future__ import annotations + +import os +import time +from dataclasses import dataclass, field +from decimal import Decimal +from typing import Any + +from rich.console import Console, Group +from rich.live import Live +from rich.panel import Panel +from rich.table import Table +from rich.text import Text + + +SPARK_CHARS = "▁▂▃▄▅▆▇█" + +# Domain → (display name, color) for styled inline display +DOMAIN_STYLE = { + "system": ("System", "bright_cyan"), + "research": ("Research", "bright_magenta"), + "data": ("Data", "bright_blue"), + "frontend": ("Frontend", "bright_yellow"), + "backend": ("Backend", "bright_green"), + "training": ("Training", "red"), + "hardware": ("Hardware", "white"), +} + + +def _sparkline(values: list[float], width: int = 20) -> str: + """Return a Unicode sparkline string from a list of values.""" + if not values: + return "" + vals = values[-width:] + lo, hi = min(vals), max(vals) + span = hi - lo if hi != lo else 1.0 + return "".join(SPARK_CHARS[min(int((v - lo) / span * (len(SPARK_CHARS) - 1)), len(SPARK_CHARS) - 1)] for v in vals) + + +def _fmt_dollars(cents: int) -> str: + return f"${cents / 100:,.2f}" + + +def _fmt_delta(cents: int) -> str: + sign = "+" if cents >= 0 else "-" + return f"{sign}${abs(cents) / 100:,.0f}" + + +def _domain_tag(domain_str: str) -> str: + """Colored domain tag like [bright_cyan]SYS[/bright_cyan].""" + label, color = DOMAIN_STYLE.get(domain_str, (domain_str[:3].upper(), "white")) + return f"[{color}]{label}[/{color}]" + + +def _mini_bar(pct: float, width: int = 8) -> str: + """Colored progress bar: green when done, yellow partial, dim empty.""" + filled = int(pct * width) + if pct >= 1.0: + return f"[bold green]{'=' * width}[/bold green]" + elif pct >= 0.5: + return f"[yellow]{'=' * filled}[/yellow][dim]{'.' * (width - filled)}[/dim]" + else: + return f"[red]{'=' * filled}[/red][dim]{'.' * (width - filled)}[/dim]" + + +@dataclass +class TaskInfo: + title: str + status: str + prestige: int + reward_dollars: float + deadline: str + domains: list[str] + progress: list[tuple[str, float, float]] # [(domain, completed, required)] + + +@dataclass +class EmployeeInfo: + name: str + salary_dollars: float + skills: list[tuple[str, float]] # [(domain, rate)] + + +@dataclass +class DashboardState: + model: str = "" + seed: int = 0 + config_name: str = "" + turn: int = 0 + sim_date: str = "" + horizon_end: str = "" + funds_cents: int = 0 + funds_delta_cents: int = 0 + funds_history: list[float] = field(default_factory=list) + runway_months: float = 0.0 + active_tasks: int = 0 + planned_tasks: int = 0 + employee_count: int = 0 + monthly_payroll_cents: int = 0 + api_cost_usd: float = 0.0 + turn_time_sec: float = 0.0 + last_action: str = "" + status: str = "" + elapsed_sec: float = 0.0 + tasks_detail: list[TaskInfo] = field(default_factory=list) + employees_detail: list[EmployeeInfo] = field(default_factory=list) + completed_count: int = 0 + failed_count: int = 0 + + +def _query_detailed_snapshot(db_factory, company_id) -> dict[str, Any]: + """Query rich task/employee details from the DB for dashboard display.""" + from ..db.models.task import Task, TaskStatus, TaskRequirement + from ..db.models.employee import Employee, EmployeeSkillRate + + with db_factory() as db: + tasks_detail = [] + for status in (TaskStatus.ACTIVE, TaskStatus.PLANNED): + tasks = db.query(Task).filter( + Task.company_id == company_id, + Task.status == status, + ).all() + for t in tasks: + reqs = db.query(TaskRequirement).filter( + TaskRequirement.task_id == t.id, + ).all() + domains = [r.domain.value for r in reqs] + progress = [ + (r.domain.value, float(r.completed_qty), float(r.required_qty)) + for r in reqs + ] + deadline_str = t.deadline.strftime("%Y-%m-%d") if t.deadline else "-" + tasks_detail.append(TaskInfo( + title=t.title, + status=status.value, + prestige=t.required_prestige, + reward_dollars=t.reward_funds_cents / 100.0, + deadline=deadline_str, + domains=domains, + progress=progress, + )) + + from sqlalchemy import func + completed_count = db.query(func.count(Task.id)).filter( + Task.company_id == company_id, + Task.status == TaskStatus.COMPLETED_SUCCESS, + ).scalar() or 0 + failed_count = db.query(func.count(Task.id)).filter( + Task.company_id == company_id, + Task.status == TaskStatus.COMPLETED_FAIL, + ).scalar() or 0 + + employees_detail = [] + employees = db.query(Employee).filter( + Employee.company_id == company_id, + ).all() + for emp in employees: + skills = db.query(EmployeeSkillRate).filter( + EmployeeSkillRate.employee_id == emp.id, + ).all() + skill_list = [ + (s.domain.value, float(s.rate_domain_per_hour)) + for s in sorted(skills, key=lambda s: float(s.rate_domain_per_hour), reverse=True) + ] + employees_detail.append(EmployeeInfo( + name=emp.name, + salary_dollars=emp.salary_cents / 100.0, + skills=skill_list, + )) + + return { + "tasks_detail": tasks_detail, + "employees_detail": employees_detail, + "completed_count": completed_count, + "failed_count": failed_count, + } + + +class BenchmarkDashboard: + """Rich Live dashboard for benchmark progress.""" + + def __init__(self, model: str, seed: int, config_name: str, + db_factory=None, company_id=None): + self._console = Console() + self._live: Live | None = None + self._state = DashboardState(model=model, seed=seed, config_name=config_name) + self._start_time = time.monotonic() + self._turn_start_time = 0.0 + self._prev_funds_cents = 0 + self._db_factory = db_factory + self._company_id = company_id + self._stderr_backup = None + self._devnull = None + + def start(self) -> None: + import sys + self._start_time = time.monotonic() + self._state.status = "[dim]Starting...[/dim]" + self._stderr_backup = sys.stderr + self._devnull = open(os.devnull, "w") + sys.stderr = self._devnull + self._live = Live( + self._render(), + console=self._console, + refresh_per_second=2, + screen=True, + ) + self._live.start() + + def stop(self) -> None: + import sys + if self._live is not None: + self._live.stop() + self._live = None + if self._stderr_backup is not None: + sys.stderr = self._stderr_backup + self._stderr_backup = None + if self._devnull is not None: + self._devnull.close() + self._devnull = None + + def mark_turn_start(self, turn_num: int) -> None: + self._turn_start_time = time.monotonic() + self._state.turn = turn_num + self._state.status = f"[yellow]>> Turn {turn_num}: waiting for LLM...[/yellow]" + self._state.elapsed_sec = time.monotonic() - self._start_time + self._refresh() + + def update(self, snapshot: dict[str, Any], run_state: Any, commands: list[str] | None = None) -> None: + now = time.monotonic() + s = self._state + + s.turn = run_state.turn_count + s.sim_date = snapshot.get("sim_time", "")[:10] + s.horizon_end = snapshot.get("horizon_end", "")[:10] + s.funds_cents = snapshot.get("funds_cents", 0) + s.funds_delta_cents = s.funds_cents - self._prev_funds_cents + self._prev_funds_cents = s.funds_cents + s.funds_history.append(s.funds_cents / 100.0) + s.active_tasks = snapshot.get("active_tasks", 0) + s.planned_tasks = snapshot.get("planned_tasks", 0) + s.employee_count = snapshot.get("employee_count", 0) + s.monthly_payroll_cents = snapshot.get("monthly_payroll_cents", 0) + s.api_cost_usd = run_state.total_cost_usd + s.turn_time_sec = now - self._turn_start_time if self._turn_start_time else 0.0 + s.elapsed_sec = now - self._start_time + + if s.monthly_payroll_cents > 0: + s.runway_months = s.funds_cents / s.monthly_payroll_cents + else: + s.runway_months = float("inf") + + if commands: + first = commands[0].split(" -> ")[0] if " -> " in commands[0] else commands[0] + if len(commands) > 1: + s.last_action = f"{first} (+{len(commands)-1} more)" + else: + s.last_action = first + else: + s.last_action = "(no commands)" + + if run_state.terminal: + reason = run_state.terminal_reason.value if run_state.terminal_reason else "unknown" + s.status = f"[bold green]DONE: {reason}[/bold green]" + else: + s.status = f"[green]Turn {s.turn} complete[/green]" + + if self._db_factory is not None and self._company_id is not None: + try: + detail = _query_detailed_snapshot(self._db_factory, self._company_id) + s.tasks_detail = detail["tasks_detail"] + s.employees_detail = detail["employees_detail"] + s.completed_count = detail["completed_count"] + s.failed_count = detail["failed_count"] + except Exception: + pass + + self._refresh() + + def print_final_summary(self, run_state: Any) -> None: + s = self._state + elapsed_m, elapsed_s = divmod(int(s.elapsed_sec), 60) + elapsed_h, elapsed_m = divmod(elapsed_m, 60) + + table = Table(show_header=False, box=None, padding=(0, 2)) + table.add_column(style="bold cyan", width=14) + table.add_column() + + table.add_row("Turns", str(s.turn)) + table.add_row("Final Funds", _fmt_dollars(s.funds_cents)) + table.add_row("Tasks", f"[green]{s.completed_count} done[/green] / [red]{s.failed_count} failed[/red]") + table.add_row("API Cost", f"${s.api_cost_usd:.4f}") + table.add_row("Elapsed", f"{elapsed_h}h {elapsed_m:02d}m {elapsed_s:02d}s") + reason = run_state.terminal_reason.value if run_state.terminal_reason else "max_turns" + table.add_row("Outcome", reason) + + panel = Panel( + table, + title="[bold]YC-Bench Complete[/bold]", + border_style="green" if reason == "horizon_end" else "red" if reason == "bankruptcy" else "yellow", + ) + self._console.print(panel) + + def _refresh(self) -> None: + if self._live is not None: + self._live.update(self._render()) + + # ------------------------------------------------------------------ + # Render helpers + # ------------------------------------------------------------------ + + def _render_stats_panel(self) -> Panel: + s = self._state + elapsed_m, elapsed_s = divmod(int(s.elapsed_sec), 60) + elapsed_h, elapsed_m = divmod(elapsed_m, 60) + short_model = s.model.rsplit("/", 1)[-1] + + table = Table(show_header=False, box=None, padding=(0, 1)) + table.add_column(style="bold cyan", width=12) + table.add_column(overflow="ellipsis", no_wrap=True) + + table.add_row("Model", f"[bold]{short_model}[/bold] seed={s.seed} {s.config_name}") + table.add_row("Turn", f"[bold white]{s.turn}[/bold white]") + table.add_row("Sim Date", f"{s.sim_date} [dim]->[/dim] {s.horizon_end}" if s.sim_date else "[dim]--[/dim]") + table.add_row("Elapsed", f"{elapsed_h}h {elapsed_m:02d}m {elapsed_s:02d}s") + + # Funds with colored sparkline + spark = _sparkline(s.funds_history) + delta_color = "green" if s.funds_delta_cents >= 0 else "red" + if s.turn > 0: + funds_str = f"[bold]{_fmt_dollars(s.funds_cents)}[/bold] [{delta_color}]{_fmt_delta(s.funds_delta_cents)}[/{delta_color}] [{delta_color}]{spark}[/{delta_color}]" + else: + funds_str = "[dim]--[/dim]" + table.add_row("Funds", funds_str) + + # Runway with urgency coloring + if s.runway_months == float("inf"): + runway_str = "[green]unlimited[/green]" + elif s.runway_months < 2: + runway_str = f"[bold red blink]{s.runway_months:.1f}mo CRITICAL[/bold red blink]" + elif s.runway_months < 4: + runway_str = f"[bold yellow]{s.runway_months:.1f}mo LOW[/bold yellow]" + else: + runway_str = f"[green]{s.runway_months:.1f}mo[/green]" + table.add_row("Runway", runway_str) + + # Task scoreboard + task_parts = f"{s.active_tasks} active / {s.planned_tasks} queued" + if s.completed_count or s.failed_count: + task_parts += f" [green]{s.completed_count} done[/green] [red]{s.failed_count} fail[/red]" + table.add_row("Tasks", task_parts) + + table.add_row("Team", f"{s.employee_count} people {_fmt_dollars(s.monthly_payroll_cents)}/mo" if s.monthly_payroll_cents else str(s.employee_count)) + table.add_row("Cost", f"${s.api_cost_usd:.4f} ({s.turn_time_sec:.1f}s/turn)" if s.turn_time_sec else f"${s.api_cost_usd:.4f}") + table.add_row("Action", s.last_action or "[dim]--[/dim]") + table.add_row("Status", s.status) + + return Panel(table, title="[bold]YC-Bench[/bold]", border_style="blue") + + def _render_tasks_panel(self) -> Panel: + s = self._state + + if not s.tasks_detail: + return Panel( + "[dim]No active or planned tasks yet...[/dim]", + title="[bold]Tasks[/bold]", + border_style="yellow", + ) + + table = Table(box=None, padding=(0, 1), show_edge=False) + table.add_column("", width=2) # status marker + table.add_column("Task", style="bold white", no_wrap=True, max_width=20) + table.add_column("$$$", width=8, justify="right", no_wrap=True) # reward + table.add_column("Due", width=10, no_wrap=True) # deadline + table.add_column("Progress", no_wrap=True, overflow="ellipsis", ratio=1) + + for t in s.tasks_detail[:6]: + if t.status == "active": + marker = "[bold green]>>[/bold green]" + else: + marker = "[dim]..[/dim]" + + # Prestige stars in yellow + stars = f"[yellow]{'*' * min(t.prestige, 5)}[/yellow]" + + # Reward colored by size + if t.reward_dollars >= 50000: + reward = f"[bold green]${t.reward_dollars:,.0f}[/bold green]" + elif t.reward_dollars >= 20000: + reward = f"[green]${t.reward_dollars:,.0f}[/green]" + else: + reward = f"${t.reward_dollars:,.0f}" + + # Domain progress with colored bars + prog_parts = [] + for domain, completed, required in t.progress: + pct = completed / required if required > 0 else 0 + bar = _mini_bar(pct, width=6) + tag = _domain_tag(domain) + prog_parts.append(f"{tag} {bar}") + progress_str = " ".join(prog_parts) + + table.add_row(marker, t.title[:20], reward, t.deadline, progress_str) + + remaining = len(s.tasks_detail) - 6 + if remaining > 0: + table.add_row("", f"[dim]+{remaining} more[/dim]", "", "", "") + + return Panel(table, title="[bold]Tasks[/bold]", border_style="yellow") + + def _render_team_panel(self) -> Panel: + s = self._state + + if not s.employees_detail: + return Panel("[dim]No employees hired yet...[/dim]", title="[bold]Team[/bold]", border_style="magenta") + + table = Table(box=None, padding=(0, 1), show_edge=False) + table.add_column("Name", style="bold white", width=14, no_wrap=True) + table.add_column("Pay", width=8, justify="right", no_wrap=True) + table.add_column("Skills", no_wrap=True, overflow="ellipsis", ratio=1) + + for emp in s.employees_detail: + # Salary colored by cost + if emp.salary_dollars >= 10000: + pay = f"[bold red]${emp.salary_dollars:,.0f}[/bold red]" + elif emp.salary_dollars >= 5000: + pay = f"[yellow]${emp.salary_dollars:,.0f}[/yellow]" + else: + pay = f"[green]${emp.salary_dollars:,.0f}[/green]" + + # Skill bars — top 3 + skill_parts = [] + for d, r in emp.skills[:3]: + tag = _domain_tag(d) + # Rate bar: scale 0-15 to a mini bar + bar_pct = min(r / 15.0, 1.0) + bar = _mini_bar(bar_pct, width=4) + skill_parts.append(f"{tag}{bar}") + skills_str = " ".join(skill_parts) + + table.add_row(emp.name[:14], pay, skills_str) + + return Panel(table, title="[bold]Team[/bold]", border_style="magenta") + + def _render(self) -> Group: + return Group( + self._render_stats_panel(), + self._render_tasks_panel(), + self._render_team_panel(), + ) + + +__all__ = ["BenchmarkDashboard", "DashboardState"] diff --git a/src/yc_bench/runner/main.py b/src/yc_bench/runner/main.py index 5bc4953..8883b44 100644 --- a/src/yc_bench/runner/main.py +++ b/src/yc_bench/runner/main.py @@ -4,6 +4,7 @@ from __future__ import annotations import json import logging import os +import sys from contextlib import contextmanager from datetime import datetime, timezone from pathlib import Path @@ -18,6 +19,9 @@ from .args import parse_run_args logger = logging.getLogger(__name__) +# Loggers that produce noisy debug output during LLM calls +_NOISY_LOGGERS = ("litellm", "httpx", "httpcore", "openai", "LiteLLM") + def _parse_date(date_str: str) -> datetime: """Accept ISO (2025-01-01) or legacy MM/DD/YYYY format.""" @@ -119,6 +123,33 @@ def _init_simulation(db_factory, args, experiment_cfg, horizon_years): # Main # --------------------------------------------------------------------------- +def _redirect_all_logging_to_file(log_file: Path) -> None: + """Redirect ALL logging from the console to a file. + + When the Rich Live dashboard is active, any output to stdout/stderr + breaks the in-place rendering, causing stacked panels. This removes + the root logger's console handlers and replaces them with a file handler. + """ + log_file.parent.mkdir(exist_ok=True) + file_handler = logging.FileHandler(str(log_file), mode="a") + file_handler.setFormatter(logging.Formatter( + "[%(asctime)s] %(name)s %(levelname)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + )) + + # Replace all console handlers on root logger with the file handler + root = logging.getLogger() + root.handlers.clear() + root.addHandler(file_handler) + + # Also ensure noisy loggers don't propagate (belt and suspenders) + for name in _NOISY_LOGGERS: + lg = logging.getLogger(name) + lg.propagate = False + lg.handlers.clear() + lg.addHandler(file_handler) + + def run_benchmark(args): """Run a full benchmark: migrate, seed, loop until terminal.""" logging.basicConfig( @@ -139,6 +170,15 @@ def run_benchmark(args): # --horizon-years CLI flag overrides config; fall back to sim.horizon_years from config horizon_years = args.horizon_years if args.horizon_years is not None else experiment_cfg.sim.horizon_years + # Decide whether to use the live dashboard + use_live = sys.stdout.isatty() and not getattr(args, "no_live", False) + + # When using the live dashboard, redirect all logging to file immediately + # so no console output interferes with Rich Live rendering. + if use_live: + log_file = Path("logs") / "debug.log" + _redirect_all_logging_to_file(log_file) + logger.info( "YC-Bench starting: experiment=%s model=%s seed=%d horizon=%dy", experiment_cfg.name, args.model, args.seed, horizon_years, @@ -187,19 +227,54 @@ def run_benchmark(args): horizon_years=horizon_years, ) - # 6. Run agent loop - loop_cfg = experiment_cfg.loop - final_state = run_agent_loop( - runtime=runtime, - db_factory=db_factory, - company_id=company_id, - run_state=run_state, - command_executor=run_command, - auto_advance_after_turns=loop_cfg.auto_advance_after_turns, - max_turns=loop_cfg.max_turns, - ) + # 6. Set up live dashboard (or not) + dashboard = None + on_turn_start = None + on_turn = None - # 7. Save full rollout (with transcript) and print summary + if use_live: + from .dashboard import BenchmarkDashboard + + dashboard = BenchmarkDashboard( + model=args.model, + seed=args.seed, + config_name=args.config_name, + db_factory=db_factory, + company_id=company_id, + ) + + def on_turn_start(turn_num): + dashboard.mark_turn_start(turn_num) + + def on_turn(snapshot, rs, commands): + dashboard.update(snapshot, rs, commands) + + # 7. Run agent loop + loop_cfg = experiment_cfg.loop + try: + if dashboard is not None: + dashboard.start() + + final_state = run_agent_loop( + runtime=runtime, + db_factory=db_factory, + company_id=company_id, + run_state=run_state, + command_executor=run_command, + auto_advance_after_turns=loop_cfg.auto_advance_after_turns, + max_turns=loop_cfg.max_turns, + on_turn_start=on_turn_start, + on_turn=on_turn, + ) + finally: + if dashboard is not None: + dashboard.stop() + + # 8. Print final summary + if dashboard is not None: + dashboard.print_final_summary(final_state) + + # 9. Save full rollout (with transcript) and print summary rollout = final_state.full_rollout() summary = final_state.summary() logger.info("Run complete: %s", json.dumps(summary, indent=2)) diff --git a/start.sh b/start.sh index 230d4c3..86a6402 100755 --- a/start.sh +++ b/start.sh @@ -3,9 +3,10 @@ set -e # ── If stdin is not a terminal (piped via curl), re-download & re-exec ── if [ ! -t 0 ]; then + rm -f /tmp/yc_bench_start.*.sh 2>/dev/null || true SELF=$(mktemp /tmp/yc_bench_start.XXXXXX.sh) curl -sSL https://raw.githubusercontent.com/collinear-ai/yc-bench/main/start.sh -o "$SELF" - exec bash "$SELF" + exec bash "$SELF" /dev/null; then - DIR=$(mktemp -d) - echo "Cloning yc-bench into $DIR/yc-bench..." - git clone --depth 1 https://github.com/collinear-ai/yc-bench.git "$DIR/yc-bench" - cd "$DIR/yc-bench" + DIR="$HOME/Downloads/yc-bench" + if [ -d "$DIR/.git" ]; then + echo "Updating existing yc-bench in $DIR..." + git -C "$DIR" pull --ff-only 2>/dev/null || true + else + echo "Cloning yc-bench into $DIR..." + git clone --depth 1 https://github.com/collinear-ai/yc-bench.git "$DIR" + fi + cd "$DIR" fi # ── Install deps & launch ───────────────────────────────────────────────