"""YC-Bench prestige radar chart — final prestige per domain, Collinear AI branding.""" import sqlite3 from pathlib import Path from math import pi import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np ROOT = Path(__file__).parent.parent INITIAL_FUNDS_CENTS = 25_000_000 # ── Collinear brand palette ────────────────────────────────────────────────── NAVY = "#13234D" ORANGE = "#F26125" BLUE = "#4D65FF" BG_COLOR = "#FAFBFD" GRID_CLR = "#E8ECF2" TEXT_CLR = "#2A2F3D" MUTED = "#6B7694" CARD_BG = "#FFFFFF" MODELS = { "sonnet": { "slug": "anthropic_claude-sonnet-4-6", "label": "Sonnet 4.6", "color": BLUE, }, "gemini": { "slug": "gemini_gemini-3-flash-preview", "label": "Gemini 3 Flash", "color": ORANGE, }, "gpt52": { "slug": "openai_gpt-5.2", "label": "GPT-5.2", "color": "#22C55E", }, "greedy": { "slug": "greedy_bot", "label": "Human Devised Rule", "color": NAVY, }, } BOT_KEYS = {"greedy"} CONFIGS = ["medium", "hard", "nightmare"] SEEDS = [1, 2, 3] DIFF_COLORS = {"medium": BLUE, "hard": ORANGE, "nightmare": "#DC2626"} DOMAINS = ["research", "inference", "data_environment", "training"] DOMAIN_LABELS = ["RES", "INF", "DATA/ENV", "TRAIN"] def load_logo_image(height_px=80): """Render the wordmark SVG to a high-res RGBA PIL image.""" import os, ctypes.util if ctypes.util.find_library("cairo") is None: brew_lib = "/opt/homebrew/lib" if Path(brew_lib).exists(): os.environ.setdefault("DYLD_LIBRARY_PATH", brew_lib) try: import cairosvg from PIL import Image import io p = ROOT / "plots" / "collinear_wordmark.svg" if not p.exists(): return None png_data = cairosvg.svg2png(url=str(p), output_height=height_px) return Image.open(io.BytesIO(png_data)).convert("RGBA") except ImportError: return None def load_prestige(db_path): """Load final prestige levels from company_prestige table.""" con = sqlite3.connect(str(db_path)) rows = con.execute( "SELECT domain, prestige_level FROM company_prestige ORDER BY domain" ).fetchall() con.close() if not rows: return None prestige = {row[0]: float(row[1]) for row in rows} # Return values in canonical domain order return [prestige.get(d, 1.0) for d in DOMAINS] def load_all(): runs = [] for config in CONFIGS: for seed in SEEDS: for key, model in MODELS.items(): db_path = ROOT / "db" / f"{config}_{seed}_{model['slug']}.db" if not db_path.exists(): continue values = load_prestige(db_path) if values is None: continue # Check if bankrupt (all prestige stuck at 1.0 = never completed a task) all_base = all(v <= 1.01 for v in values) runs.append({ "config": config, "seed": seed, "model_key": key, "label": model["label"], "color": model["color"], "values": values, "all_base": all_base, "max_prestige": max(values), }) tag = "all-1.0" if all_base else f"max={max(values):.1f}" print(f" {config} seed={seed} {model['label']}: {tag}") return runs def make_plot(runs): fig = plt.figure(figsize=(30, 22), facecolor=BG_COLOR) # ── Header band ────────────────────────────────────────────────────── header_rect = plt.Rectangle((0, 0.90), 1, 0.10, transform=fig.transFigure, facecolor=NAVY, edgecolor="none", zorder=0) fig.patches.append(header_rect) accent_rect = plt.Rectangle((0, 0.895), 1, 0.006, transform=fig.transFigure, facecolor=ORANGE, edgecolor="none", zorder=1) fig.patches.append(accent_rect) fig.text( 0.5, 0.955, "YC-Bench | Prestige Radar | 1-Year Horizon", ha="center", va="center", fontsize=46, fontweight="700", color="white", fontfamily="Helvetica Neue", zorder=2, ) # ── Common legend in header ────────────────────────────────────────── legend_items = [ ("Sonnet 4.6", BLUE, "-", 4.0, 0.95), ("Gemini 3 Flash", ORANGE, "-", 4.0, 0.95), ("GPT-5.2", "#22C55E", "-", 4.0, 0.95), ("Human Devised Rule", NAVY, "--", 3.5, 0.75), ] legend_handles = [] for lbl, clr, ls, lw, alpha in legend_items: line = plt.Line2D([0], [0], color=clr, linewidth=lw, linestyle=ls, alpha=alpha) legend_handles.append(line) legend_labels = [item[0] for item in legend_items] fig.legend( legend_handles, legend_labels, loc="center", bbox_to_anchor=(0.53, 0.855), ncol=4, fontsize=22, frameon=False, labelcolor=TEXT_CLR, handlelength=3.5, handletextpad=1.0, columnspacing=3.0, ) logo_img = load_logo_image(height_px=120) # ── Radar setup ────────────────────────────────────────────────────── N = len(DOMAINS) angles = [n / float(N) * 2 * pi for n in range(N)] angles += angles[:1] # close the polygon # Create 3x3 grid of polar subplots for row, config in enumerate(CONFIGS): for col, seed in enumerate(SEEDS): ax = fig.add_subplot(3, 3, row * 3 + col + 1, polar=True) ax.set_facecolor(CARD_BG) # Configure the radar grid ax.set_theta_offset(pi / 2) # Start from top ax.set_theta_direction(-1) # Clockwise ax.set_rlabel_position(0) # Domain labels ax.set_xticks(angles[:-1]) ax.set_xticklabels(DOMAIN_LABELS, fontsize=16, color=TEXT_CLR, fontweight="500") # Radial grid (prestige 1-10) ax.set_ylim(0, 10) ax.set_yticks([2, 4, 6, 8, 10]) ax.set_yticklabels(["2", "4", "6", "8", "10"], fontsize=11, color=MUTED) # Grid styling ax.spines["polar"].set_color(GRID_CLR) ax.grid(color=GRID_CLR, linewidth=0.8, alpha=0.8) ax.tick_params(axis="x", pad=14) # Plot each model cell_runs = [r for r in runs if r["config"] == config and r["seed"] == seed] # Sort: bots first (background), then by max prestige desc def sort_key(r): if r["model_key"] in BOT_KEYS: return (0, 0) return (1, -r["max_prestige"]) cell_runs.sort(key=sort_key) for r in cell_runs: values = r["values"] + r["values"][:1] # close polygon is_bot = r["model_key"] in BOT_KEYS if r["all_base"]: alpha, lw, ls = 0.3, 2.0, "-" if not is_bot else "--" fill_alpha = 0.05 elif is_bot: alpha, lw, ls = 0.75, 3.0, "--" fill_alpha = 0.08 else: alpha, lw, ls = 0.95, 3.0, "-" fill_alpha = 0.12 ax.plot(angles, values, color=r["color"], linewidth=lw, alpha=alpha, linestyle=ls, zorder=2 if is_bot else 3) ax.fill(angles, values, color=r["color"], alpha=fill_alpha, zorder=1 if is_bot else 2) # ── Layout and labels ──────────────────────────────────────────────── plt.subplots_adjust( left=0.08, right=0.98, top=0.79, bottom=0.05, hspace=0.35, wspace=0.28, ) # Row labels (config names) row_y_positions = [0.70, 0.42, 0.14] # approximate centers of each row for row, config in enumerate(CONFIGS): fig.text( 0.025, row_y_positions[row], config.upper(), fontsize=23, fontweight="800", color=DIFF_COLORS[config], ha="center", va="center", rotation=90, ) # Seed column headers col_centers = [0.08 + (0.98 - 0.08) * (i + 0.5) / 3 for i in range(3)] for i, seed in enumerate(SEEDS): fig.text( col_centers[i], 0.80, f"Seed {seed}", ha="center", va="bottom", fontsize=26, fontweight="600", color=TEXT_CLR, ) # Footer fig.text( 0.5, 0.01, "collinear.ai | YC-Bench: Long-Horizon Deterministic Benchmark for LLM Agents", ha="center", va="bottom", fontsize=18, fontweight="400", color=MUTED, fontstyle="italic", ) out = ROOT / "plots" / "prestige_radar.png" out.parent.mkdir(parents=True, exist_ok=True) dpi = 150 plt.savefig(out, dpi=dpi, facecolor=BG_COLOR, pad_inches=0) # Composite logo if logo_img is not None: from PIL import Image plot_img = Image.open(out).convert("RGBA") img_w, img_h = plot_img.size header_h = int(img_h * 0.10) target_h = int(header_h * 0.65) scale = target_h / logo_img.size[1] logo = logo_img.resize((int(logo_img.size[0] * scale), target_h), Image.LANCZOS) y_offset = (header_h - target_h) // 2 x_offset = 70 plot_img.paste(logo, (x_offset, y_offset), logo) plot_img.save(out) print(f"\nSaved: {out}") if __name__ == "__main__": print("Loading prestige data...") runs = load_all() if not runs: print("No data found.") else: make_plot(runs)