Add live terminal dashboard with Rich

Replace scrolling LiteLLM debug logs with an in-place Rich Live dashboard that shows key metrics after each turn: funds sparkline, task progress bars with colored domain labels, team skill bars, runway urgency, and more. - New: src/yc_bench/runner/dashboard.py (BenchmarkDashboard, DashboardState) - Add on_turn/on_turn_start callbacks to agent loop - Auto-detect TTY, redirect all logging to logs/debug.log when live - Add --no-live flag to disable dashboard and get old log output - Use alternate screen buffer (screen=True) for clean rendering - Fix start.sh: clean up stale temp files before mktemp Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-26 17:13:13 +00:00 · 2026-02-26 22:11:55 -08:00 · 2026-02-26 22:11:55 -08:00 · f25a2be1e4
commit f25a2be1e4
parent d4ce0a1e5a
6 changed files with 570 additions and 17 deletions
--- a/src/yc_bench/runner/args.py
+++ b/src/yc_bench/runner/args.py
@ -12,6 +12,7 @@ class RunArgs:
    company_name: str
    start_date: str
    config_name: str = "default"
+    no_live: bool = False

 def build_parser():
    parser = argparse.ArgumentParser(
@ -27,6 +28,10 @@ def build_parser():
        "--config", dest="config_name", default="default",
        help="Preset name ('default', 'fast_test', 'high_reward') or path to a .toml file",
    )
+    parser.add_argument(
+        "--no-live", action="store_true", default=False,
+        help="Disable the live terminal dashboard (show raw log output instead)",
+    )
    return parser

 def parse_run_args(argv):
@ -40,6 +45,7 @@ def parse_run_args(argv):
        company_name=ns.company_name,
        start_date=ns.start_date,
        config_name=ns.config_name,
+        no_live=ns.no_live,
    )

 def _validate(ns, parser):
--- a/src/yc_bench/runner/dashboard.py
+++ b/src/yc_bench/runner/dashboard.py
@ -0,0 +1,454 @@
+"""Live terminal dashboard for YC-Bench using Rich."""
+from __future__ import annotations
+
+import os
+import time
+from dataclasses import dataclass, field
+from decimal import Decimal
+from typing import Any
+
+from rich.console import Console, Group
+from rich.live import Live
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+
+
+SPARK_CHARS = "▁▂▃▄▅▆▇█"
+
+# Domain → (display name, color) for styled inline display
+DOMAIN_STYLE = {
+    "system":   ("System",   "bright_cyan"),
+    "research": ("Research", "bright_magenta"),
+    "data":     ("Data",     "bright_blue"),
+    "frontend": ("Frontend", "bright_yellow"),
+    "backend":  ("Backend",  "bright_green"),
+    "training": ("Training", "red"),
+    "hardware": ("Hardware", "white"),
+}
+
+
+def _sparkline(values: list[float], width: int = 20) -> str:
+    """Return a Unicode sparkline string from a list of values."""
+    if not values:
+        return ""
+    vals = values[-width:]
+    lo, hi = min(vals), max(vals)
+    span = hi - lo if hi != lo else 1.0
+    return "".join(SPARK_CHARS[min(int((v - lo) / span * (len(SPARK_CHARS) - 1)), len(SPARK_CHARS) - 1)] for v in vals)
+
+
+def _fmt_dollars(cents: int) -> str:
+    return f"${cents / 100:,.2f}"
+
+
+def _fmt_delta(cents: int) -> str:
+    sign = "+" if cents >= 0 else "-"
+    return f"{sign}${abs(cents) / 100:,.0f}"
+
+
+def _domain_tag(domain_str: str) -> str:
+    """Colored domain tag like [bright_cyan]SYS[/bright_cyan]."""
+    label, color = DOMAIN_STYLE.get(domain_str, (domain_str[:3].upper(), "white"))
+    return f"[{color}]{label}[/{color}]"
+
+
+def _mini_bar(pct: float, width: int = 8) -> str:
+    """Colored progress bar: green when done, yellow partial, dim empty."""
+    filled = int(pct * width)
+    if pct >= 1.0:
+        return f"[bold green]{'=' * width}[/bold green]"
+    elif pct >= 0.5:
+        return f"[yellow]{'=' * filled}[/yellow][dim]{'.' * (width - filled)}[/dim]"
+    else:
+        return f"[red]{'=' * filled}[/red][dim]{'.' * (width - filled)}[/dim]"
+
+
+@dataclass
+class TaskInfo:
+    title: str
+    status: str
+    prestige: int
+    reward_dollars: float
+    deadline: str
+    domains: list[str]
+    progress: list[tuple[str, float, float]]  # [(domain, completed, required)]
+
+
+@dataclass
+class EmployeeInfo:
+    name: str
+    salary_dollars: float
+    skills: list[tuple[str, float]]  # [(domain, rate)]
+
+
+@dataclass
+class DashboardState:
+    model: str = ""
+    seed: int = 0
+    config_name: str = ""
+    turn: int = 0
+    sim_date: str = ""
+    horizon_end: str = ""
+    funds_cents: int = 0
+    funds_delta_cents: int = 0
+    funds_history: list[float] = field(default_factory=list)
+    runway_months: float = 0.0
+    active_tasks: int = 0
+    planned_tasks: int = 0
+    employee_count: int = 0
+    monthly_payroll_cents: int = 0
+    api_cost_usd: float = 0.0
+    turn_time_sec: float = 0.0
+    last_action: str = ""
+    status: str = ""
+    elapsed_sec: float = 0.0
+    tasks_detail: list[TaskInfo] = field(default_factory=list)
+    employees_detail: list[EmployeeInfo] = field(default_factory=list)
+    completed_count: int = 0
+    failed_count: int = 0
+
+
+def _query_detailed_snapshot(db_factory, company_id) -> dict[str, Any]:
+    """Query rich task/employee details from the DB for dashboard display."""
+    from ..db.models.task import Task, TaskStatus, TaskRequirement
+    from ..db.models.employee import Employee, EmployeeSkillRate
+
+    with db_factory() as db:
+        tasks_detail = []
+        for status in (TaskStatus.ACTIVE, TaskStatus.PLANNED):
+            tasks = db.query(Task).filter(
+                Task.company_id == company_id,
+                Task.status == status,
+            ).all()
+            for t in tasks:
+                reqs = db.query(TaskRequirement).filter(
+                    TaskRequirement.task_id == t.id,
+                ).all()
+                domains = [r.domain.value for r in reqs]
+                progress = [
+                    (r.domain.value, float(r.completed_qty), float(r.required_qty))
+                    for r in reqs
+                ]
+                deadline_str = t.deadline.strftime("%Y-%m-%d") if t.deadline else "-"
+                tasks_detail.append(TaskInfo(
+                    title=t.title,
+                    status=status.value,
+                    prestige=t.required_prestige,
+                    reward_dollars=t.reward_funds_cents / 100.0,
+                    deadline=deadline_str,
+                    domains=domains,
+                    progress=progress,
+                ))
+
+        from sqlalchemy import func
+        completed_count = db.query(func.count(Task.id)).filter(
+            Task.company_id == company_id,
+            Task.status == TaskStatus.COMPLETED_SUCCESS,
+        ).scalar() or 0
+        failed_count = db.query(func.count(Task.id)).filter(
+            Task.company_id == company_id,
+            Task.status == TaskStatus.COMPLETED_FAIL,
+        ).scalar() or 0
+
+        employees_detail = []
+        employees = db.query(Employee).filter(
+            Employee.company_id == company_id,
+        ).all()
+        for emp in employees:
+            skills = db.query(EmployeeSkillRate).filter(
+                EmployeeSkillRate.employee_id == emp.id,
+            ).all()
+            skill_list = [
+                (s.domain.value, float(s.rate_domain_per_hour))
+                for s in sorted(skills, key=lambda s: float(s.rate_domain_per_hour), reverse=True)
+            ]
+            employees_detail.append(EmployeeInfo(
+                name=emp.name,
+                salary_dollars=emp.salary_cents / 100.0,
+                skills=skill_list,
+            ))
+
+    return {
+        "tasks_detail": tasks_detail,
+        "employees_detail": employees_detail,
+        "completed_count": completed_count,
+        "failed_count": failed_count,
+    }
+
+
+class BenchmarkDashboard:
+    """Rich Live dashboard for benchmark progress."""
+
+    def __init__(self, model: str, seed: int, config_name: str,
+                 db_factory=None, company_id=None):
+        self._console = Console()
+        self._live: Live | None = None
+        self._state = DashboardState(model=model, seed=seed, config_name=config_name)
+        self._start_time = time.monotonic()
+        self._turn_start_time = 0.0
+        self._prev_funds_cents = 0
+        self._db_factory = db_factory
+        self._company_id = company_id
+        self._stderr_backup = None
+        self._devnull = None
+
+    def start(self) -> None:
+        import sys
+        self._start_time = time.monotonic()
+        self._state.status = "[dim]Starting...[/dim]"
+        self._stderr_backup = sys.stderr
+        self._devnull = open(os.devnull, "w")
+        sys.stderr = self._devnull
+        self._live = Live(
+            self._render(),
+            console=self._console,
+            refresh_per_second=2,
+            screen=True,
+        )
+        self._live.start()
+
+    def stop(self) -> None:
+        import sys
+        if self._live is not None:
+            self._live.stop()
+            self._live = None
+        if self._stderr_backup is not None:
+            sys.stderr = self._stderr_backup
+            self._stderr_backup = None
+        if self._devnull is not None:
+            self._devnull.close()
+            self._devnull = None
+
+    def mark_turn_start(self, turn_num: int) -> None:
+        self._turn_start_time = time.monotonic()
+        self._state.turn = turn_num
+        self._state.status = f"[yellow]>> Turn {turn_num}: waiting for LLM...[/yellow]"
+        self._state.elapsed_sec = time.monotonic() - self._start_time
+        self._refresh()
+
+    def update(self, snapshot: dict[str, Any], run_state: Any, commands: list[str] | None = None) -> None:
+        now = time.monotonic()
+        s = self._state
+
+        s.turn = run_state.turn_count
+        s.sim_date = snapshot.get("sim_time", "")[:10]
+        s.horizon_end = snapshot.get("horizon_end", "")[:10]
+        s.funds_cents = snapshot.get("funds_cents", 0)
+        s.funds_delta_cents = s.funds_cents - self._prev_funds_cents
+        self._prev_funds_cents = s.funds_cents
+        s.funds_history.append(s.funds_cents / 100.0)
+        s.active_tasks = snapshot.get("active_tasks", 0)
+        s.planned_tasks = snapshot.get("planned_tasks", 0)
+        s.employee_count = snapshot.get("employee_count", 0)
+        s.monthly_payroll_cents = snapshot.get("monthly_payroll_cents", 0)
+        s.api_cost_usd = run_state.total_cost_usd
+        s.turn_time_sec = now - self._turn_start_time if self._turn_start_time else 0.0
+        s.elapsed_sec = now - self._start_time
+
+        if s.monthly_payroll_cents > 0:
+            s.runway_months = s.funds_cents / s.monthly_payroll_cents
+        else:
+            s.runway_months = float("inf")
+
+        if commands:
+            first = commands[0].split(" -> ")[0] if " -> " in commands[0] else commands[0]
+            if len(commands) > 1:
+                s.last_action = f"{first} (+{len(commands)-1} more)"
+            else:
+                s.last_action = first
+        else:
+            s.last_action = "(no commands)"
+
+        if run_state.terminal:
+            reason = run_state.terminal_reason.value if run_state.terminal_reason else "unknown"
+            s.status = f"[bold green]DONE: {reason}[/bold green]"
+        else:
+            s.status = f"[green]Turn {s.turn} complete[/green]"
+
+        if self._db_factory is not None and self._company_id is not None:
+            try:
+                detail = _query_detailed_snapshot(self._db_factory, self._company_id)
+                s.tasks_detail = detail["tasks_detail"]
+                s.employees_detail = detail["employees_detail"]
+                s.completed_count = detail["completed_count"]
+                s.failed_count = detail["failed_count"]
+            except Exception:
+                pass
+
+        self._refresh()
+
+    def print_final_summary(self, run_state: Any) -> None:
+        s = self._state
+        elapsed_m, elapsed_s = divmod(int(s.elapsed_sec), 60)
+        elapsed_h, elapsed_m = divmod(elapsed_m, 60)
+
+        table = Table(show_header=False, box=None, padding=(0, 2))
+        table.add_column(style="bold cyan", width=14)
+        table.add_column()
+
+        table.add_row("Turns", str(s.turn))
+        table.add_row("Final Funds", _fmt_dollars(s.funds_cents))
+        table.add_row("Tasks", f"[green]{s.completed_count} done[/green] / [red]{s.failed_count} failed[/red]")
+        table.add_row("API Cost", f"${s.api_cost_usd:.4f}")
+        table.add_row("Elapsed", f"{elapsed_h}h {elapsed_m:02d}m {elapsed_s:02d}s")
+        reason = run_state.terminal_reason.value if run_state.terminal_reason else "max_turns"
+        table.add_row("Outcome", reason)
+
+        panel = Panel(
+            table,
+            title="[bold]YC-Bench Complete[/bold]",
+            border_style="green" if reason == "horizon_end" else "red" if reason == "bankruptcy" else "yellow",
+        )
+        self._console.print(panel)
+
+    def _refresh(self) -> None:
+        if self._live is not None:
+            self._live.update(self._render())
+
+    # ------------------------------------------------------------------
+    # Render helpers
+    # ------------------------------------------------------------------
+
+    def _render_stats_panel(self) -> Panel:
+        s = self._state
+        elapsed_m, elapsed_s = divmod(int(s.elapsed_sec), 60)
+        elapsed_h, elapsed_m = divmod(elapsed_m, 60)
+        short_model = s.model.rsplit("/", 1)[-1]
+
+        table = Table(show_header=False, box=None, padding=(0, 1))
+        table.add_column(style="bold cyan", width=12)
+        table.add_column(overflow="ellipsis", no_wrap=True)
+
+        table.add_row("Model", f"[bold]{short_model}[/bold]  seed={s.seed}  {s.config_name}")
+        table.add_row("Turn", f"[bold white]{s.turn}[/bold white]")
+        table.add_row("Sim Date", f"{s.sim_date} [dim]->[/dim] {s.horizon_end}" if s.sim_date else "[dim]--[/dim]")
+        table.add_row("Elapsed", f"{elapsed_h}h {elapsed_m:02d}m {elapsed_s:02d}s")
+
+        # Funds with colored sparkline
+        spark = _sparkline(s.funds_history)
+        delta_color = "green" if s.funds_delta_cents >= 0 else "red"
+        if s.turn > 0:
+            funds_str = f"[bold]{_fmt_dollars(s.funds_cents)}[/bold] [{delta_color}]{_fmt_delta(s.funds_delta_cents)}[/{delta_color}] [{delta_color}]{spark}[/{delta_color}]"
+        else:
+            funds_str = "[dim]--[/dim]"
+        table.add_row("Funds", funds_str)
+
+        # Runway with urgency coloring
+        if s.runway_months == float("inf"):
+            runway_str = "[green]unlimited[/green]"
+        elif s.runway_months < 2:
+            runway_str = f"[bold red blink]{s.runway_months:.1f}mo CRITICAL[/bold red blink]"
+        elif s.runway_months < 4:
+            runway_str = f"[bold yellow]{s.runway_months:.1f}mo LOW[/bold yellow]"
+        else:
+            runway_str = f"[green]{s.runway_months:.1f}mo[/green]"
+        table.add_row("Runway", runway_str)
+
+        # Task scoreboard
+        task_parts = f"{s.active_tasks} active / {s.planned_tasks} queued"
+        if s.completed_count or s.failed_count:
+            task_parts += f"  [green]{s.completed_count} done[/green] [red]{s.failed_count} fail[/red]"
+        table.add_row("Tasks", task_parts)
+
+        table.add_row("Team", f"{s.employee_count} people  {_fmt_dollars(s.monthly_payroll_cents)}/mo" if s.monthly_payroll_cents else str(s.employee_count))
+        table.add_row("Cost", f"${s.api_cost_usd:.4f}  ({s.turn_time_sec:.1f}s/turn)" if s.turn_time_sec else f"${s.api_cost_usd:.4f}")
+        table.add_row("Action", s.last_action or "[dim]--[/dim]")
+        table.add_row("Status", s.status)
+
+        return Panel(table, title="[bold]YC-Bench[/bold]", border_style="blue")
+
+    def _render_tasks_panel(self) -> Panel:
+        s = self._state
+
+        if not s.tasks_detail:
+            return Panel(
+                "[dim]No active or planned tasks yet...[/dim]",
+                title="[bold]Tasks[/bold]",
+                border_style="yellow",
+            )
+
+        table = Table(box=None, padding=(0, 1), show_edge=False)
+        table.add_column("", width=2)                                           # status marker
+        table.add_column("Task", style="bold white", no_wrap=True, max_width=20)
+        table.add_column("$$$", width=8, justify="right", no_wrap=True)         # reward
+        table.add_column("Due", width=10, no_wrap=True)                         # deadline
+        table.add_column("Progress", no_wrap=True, overflow="ellipsis", ratio=1)
+
+        for t in s.tasks_detail[:6]:
+            if t.status == "active":
+                marker = "[bold green]>>[/bold green]"
+            else:
+                marker = "[dim]..[/dim]"
+
+            # Prestige stars in yellow
+            stars = f"[yellow]{'*' * min(t.prestige, 5)}[/yellow]"
+
+            # Reward colored by size
+            if t.reward_dollars >= 50000:
+                reward = f"[bold green]${t.reward_dollars:,.0f}[/bold green]"
+            elif t.reward_dollars >= 20000:
+                reward = f"[green]${t.reward_dollars:,.0f}[/green]"
+            else:
+                reward = f"${t.reward_dollars:,.0f}"
+
+            # Domain progress with colored bars
+            prog_parts = []
+            for domain, completed, required in t.progress:
+                pct = completed / required if required > 0 else 0
+                bar = _mini_bar(pct, width=6)
+                tag = _domain_tag(domain)
+                prog_parts.append(f"{tag} {bar}")
+            progress_str = " ".join(prog_parts)
+
+            table.add_row(marker, t.title[:20], reward, t.deadline, progress_str)
+
+        remaining = len(s.tasks_detail) - 6
+        if remaining > 0:
+            table.add_row("", f"[dim]+{remaining} more[/dim]", "", "", "")
+
+        return Panel(table, title="[bold]Tasks[/bold]", border_style="yellow")
+
+    def _render_team_panel(self) -> Panel:
+        s = self._state
+
+        if not s.employees_detail:
+            return Panel("[dim]No employees hired yet...[/dim]", title="[bold]Team[/bold]", border_style="magenta")
+
+        table = Table(box=None, padding=(0, 1), show_edge=False)
+        table.add_column("Name", style="bold white", width=14, no_wrap=True)
+        table.add_column("Pay", width=8, justify="right", no_wrap=True)
+        table.add_column("Skills", no_wrap=True, overflow="ellipsis", ratio=1)
+
+        for emp in s.employees_detail:
+            # Salary colored by cost
+            if emp.salary_dollars >= 10000:
+                pay = f"[bold red]${emp.salary_dollars:,.0f}[/bold red]"
+            elif emp.salary_dollars >= 5000:
+                pay = f"[yellow]${emp.salary_dollars:,.0f}[/yellow]"
+            else:
+                pay = f"[green]${emp.salary_dollars:,.0f}[/green]"
+
+            # Skill bars — top 3
+            skill_parts = []
+            for d, r in emp.skills[:3]:
+                tag = _domain_tag(d)
+                # Rate bar: scale 0-15 to a mini bar
+                bar_pct = min(r / 15.0, 1.0)
+                bar = _mini_bar(bar_pct, width=4)
+                skill_parts.append(f"{tag}{bar}")
+            skills_str = " ".join(skill_parts)
+
+            table.add_row(emp.name[:14], pay, skills_str)
+
+        return Panel(table, title="[bold]Team[/bold]", border_style="magenta")
+
+    def _render(self) -> Group:
+        return Group(
+            self._render_stats_panel(),
+            self._render_tasks_panel(),
+            self._render_team_panel(),
+        )
+
+
+__all__ = ["BenchmarkDashboard", "DashboardState"]
--- a/src/yc_bench/runner/main.py
+++ b/src/yc_bench/runner/main.py
@ -4,6 +4,7 @@ from __future__ import annotations
 import json
 import logging
 import os
+import sys
 from contextlib import contextmanager
 from datetime import datetime, timezone
 from pathlib import Path
@ -18,6 +19,9 @@ from .args import parse_run_args

 logger = logging.getLogger(__name__)

+# Loggers that produce noisy debug output during LLM calls
+_NOISY_LOGGERS = ("litellm", "httpx", "httpcore", "openai", "LiteLLM")
+

 def _parse_date(date_str: str) -> datetime:
    """Accept ISO (2025-01-01) or legacy MM/DD/YYYY format."""
@ -119,6 +123,33 @@ def _init_simulation(db_factory, args, experiment_cfg, horizon_years):
 # Main
 # ---------------------------------------------------------------------------

+def _redirect_all_logging_to_file(log_file: Path) -> None:
+    """Redirect ALL logging from the console to a file.
+
+    When the Rich Live dashboard is active, any output to stdout/stderr
+    breaks the in-place rendering, causing stacked panels. This removes
+    the root logger's console handlers and replaces them with a file handler.
+    """
+    log_file.parent.mkdir(exist_ok=True)
+    file_handler = logging.FileHandler(str(log_file), mode="a")
+    file_handler.setFormatter(logging.Formatter(
+        "[%(asctime)s] %(name)s %(levelname)s: %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    ))
+
+    # Replace all console handlers on root logger with the file handler
+    root = logging.getLogger()
+    root.handlers.clear()
+    root.addHandler(file_handler)
+
+    # Also ensure noisy loggers don't propagate (belt and suspenders)
+    for name in _NOISY_LOGGERS:
+        lg = logging.getLogger(name)
+        lg.propagate = False
+        lg.handlers.clear()
+        lg.addHandler(file_handler)
+
+
 def run_benchmark(args):
    """Run a full benchmark: migrate, seed, loop until terminal."""
    logging.basicConfig(
@ -139,6 +170,15 @@ def run_benchmark(args):
    # --horizon-years CLI flag overrides config; fall back to sim.horizon_years from config
    horizon_years = args.horizon_years if args.horizon_years is not None else experiment_cfg.sim.horizon_years

+    # Decide whether to use the live dashboard
+    use_live = sys.stdout.isatty() and not getattr(args, "no_live", False)
+
+    # When using the live dashboard, redirect all logging to file immediately
+    # so no console output interferes with Rich Live rendering.
+    if use_live:
+        log_file = Path("logs") / "debug.log"
+        _redirect_all_logging_to_file(log_file)
+
    logger.info(
        "YC-Bench starting: experiment=%s model=%s seed=%d horizon=%dy",
        experiment_cfg.name, args.model, args.seed, horizon_years,
@ -187,19 +227,54 @@ def run_benchmark(args):
        horizon_years=horizon_years,
    )

-    # 6. Run agent loop
-    loop_cfg = experiment_cfg.loop
-    final_state = run_agent_loop(
-        runtime=runtime,
-        db_factory=db_factory,
-        company_id=company_id,
-        run_state=run_state,
-        command_executor=run_command,
-        auto_advance_after_turns=loop_cfg.auto_advance_after_turns,
-        max_turns=loop_cfg.max_turns,
-    )
+    # 6. Set up live dashboard (or not)
+    dashboard = None
+    on_turn_start = None
+    on_turn = None

-    # 7. Save full rollout (with transcript) and print summary
+    if use_live:
+        from .dashboard import BenchmarkDashboard
+
+        dashboard = BenchmarkDashboard(
+            model=args.model,
+            seed=args.seed,
+            config_name=args.config_name,
+            db_factory=db_factory,
+            company_id=company_id,
+        )
+
+        def on_turn_start(turn_num):
+            dashboard.mark_turn_start(turn_num)
+
+        def on_turn(snapshot, rs, commands):
+            dashboard.update(snapshot, rs, commands)
+
+    # 7. Run agent loop
+    loop_cfg = experiment_cfg.loop
+    try:
+        if dashboard is not None:
+            dashboard.start()
+
+        final_state = run_agent_loop(
+            runtime=runtime,
+            db_factory=db_factory,
+            company_id=company_id,
+            run_state=run_state,
+            command_executor=run_command,
+            auto_advance_after_turns=loop_cfg.auto_advance_after_turns,
+            max_turns=loop_cfg.max_turns,
+            on_turn_start=on_turn_start,
+            on_turn=on_turn,
+        )
+    finally:
+        if dashboard is not None:
+            dashboard.stop()
+
+    # 8. Print final summary
+    if dashboard is not None:
+        dashboard.print_final_summary(final_state)
+
+    # 9. Save full rollout (with transcript) and print summary
    rollout = final_state.full_rollout()
    summary = final_state.summary()
    logger.info("Run complete: %s", json.dumps(summary, indent=2))