Move to subfolder

2026-04-23 16:54:56 +00:00 · 2025-05-21 16:18:01 -07:00 · 2025-05-21 16:18:01 -07:00 · 7eae51cc5c
commit 7eae51cc5c
parent a88e3afddf
23 changed files with 0 additions and 0 deletions
--- a/environments/hack0/DeepSacrifice/server/agents/attacker_agent.ts
+++ b/environments/hack0/DeepSacrifice/server/agents/attacker_agent.ts
@ -0,0 +1,46 @@
+import { Chess } from "chess.js";
+
+export function getAggressiveMove(
+  fen: string,
+): { from: string; to: string } | null {
+  const chess = new Chess(fen);
+  const moves = chess.moves({ verbose: true });
+  if (moves.length === 0) return null;
+
+  // Prefer captures
+  const captures = moves.filter(
+    (m) => m.flags.includes("c") || m.flags.includes("e"),
+  );
+  if (captures.length > 0) {
+    const move = captures[Math.floor(Math.random() * captures.length)];
+    return { from: move.from, to: move.to };
+  }
+
+  // Prefer checks
+  const checks = moves.filter((m) => {
+    chess.move({ from: m.from, to: m.to });
+    const isCheck = chess.inCheck();
+    chess.undo();
+    return isCheck;
+  });
+  if (checks.length > 0) {
+    const move = checks[Math.floor(Math.random() * checks.length)];
+    return { from: move.from, to: move.to };
+  }
+
+  // Otherwise, pick random
+  const move = moves[Math.floor(Math.random() * moves.length)];
+  return { from: move.from, to: move.to };
+}
+
+// Minimal AttackerAgent class with placeholder learning
+export class AttackerAgent {
+  getMove(fen: string): { from: string; to: string } | null {
+    return getAggressiveMove(fen);
+  }
+
+  learnFromGame(gameData: any, llmFeedback: any): void {
+    // TODO: Implement incremental learning from LLM feedback
+    // For now, this is a placeholder
+  }
+}
--- a/environments/hack0/DeepSacrifice/server/api/serve.ts
+++ b/environments/hack0/DeepSacrifice/server/api/serve.ts
@ -0,0 +1,149 @@
+import { Elysia } from "elysia";
+import { getAggressiveMove } from "../agents/attacker_agent";
+import { ChessEnv } from "../env/chess_env";
+import { scoreAndJustifyGame } from "../llm/llm_feedback";
+import { computeReward } from "../reward/reward_fn";
+
+const env = new ChessEnv();
+const games: any[] = [];
+let currentGame: any[] = [];
+
+const app = new Elysia()
+  .get("/ping", () => "pong")
+  .get("/api/games/latest", () => games.slice(-5))
+  .post("/api/move", async ({ body }) => {
+    const { from, to, san, color } = body as {
+      from: string;
+      to: string;
+      san?: string;
+      color?: string;
+    };
+    const currentFen = env.getFEN();
+    const currentColor = currentFen.split(" ")[1] === "b" ? "black" : "white";
+    if (color && color !== currentColor) {
+      return { error: `It's not ${color}'s turn.` };
+    }
+    // User move
+    const { fen: userFen, done: userDone } = env.step({ from, to });
+    const userMoveData = {
+      fen: userFen,
+      move: { from, to, san },
+      reward: null, // Placeholder, to be filled after scoring
+      llmFeedback: { score: null, justification: null },
+    };
+    currentGame.push(userMoveData);
+    // If game is over after user move, return
+    if (userDone) {
+      games.push([...currentGame]);
+      const moves = [...currentGame];
+      currentGame = [];
+      return {
+        moves,
+        done: true,
+      };
+    }
+    // Agent move (as black)
+    const agentMove = getAggressiveMove(userFen);
+    let agentMoveData = null;
+    let agentDone = false;
+    if (agentMove) {
+      const agentPrevFen = env.getFEN();
+      const { fen: agentFen, done: agentIsDone } = env.step(agentMove);
+      agentMoveData = {
+        fen: agentFen,
+        move: agentMove,
+        reward: null, // Placeholder
+        llmFeedback: { score: null, justification: null },
+      };
+      currentGame.push(agentMoveData);
+      agentDone = agentIsDone;
+      if (agentIsDone) {
+        games.push([...currentGame]);
+        const moves = [...currentGame];
+        currentGame = [];
+        return {
+          moves,
+          done: true,
+        };
+      }
+    }
+    return {
+      moves: [userMoveData, agentMoveData].filter(Boolean),
+      done: agentDone,
+    };
+  })
+  // .get("/evaluate", async () => {
+  //   const fen = env.getFEN();
+  //   const evalResult = await evaluatePosition(fen);
+  //   return evalResult;
+  // })
+  .post("/api/train/start", () => ({ started: true }))
+  .get("/api/agent/status", () => ({ gamesPlayed: games.length, avgReward: 0 }))
+  .post("/api/reset", () => {
+    env.reset();
+    currentGame.length = 0;
+    return { fen: env.getFEN() };
+  })
+  .post("/api/game/llm_feedback", async ({ body }) => {
+    // Expects: { moves: [{ fen, move: { from, to, san } }] }
+    const { moves } = body as {
+      moves: {
+        fen: string;
+        move: { from: string; to: string; san?: string };
+      }[];
+    };
+    if (!Array.isArray(moves)) {
+      return { error: "Missing or invalid moves array" };
+    }
+    const fenHistory = moves.map((m) => m.fen);
+    const moveSANs = moves.map(
+      (m) => m.move.san || `${m.move.from}-${m.move.to}`,
+    );
+    // Only score agent moves (even indices)
+    const agentMoveIndices = moves
+      .map((_, idx) => idx)
+      .filter((idx) => idx % 2 === 1);
+    const agentFens = agentMoveIndices.map((idx) => fenHistory[idx]);
+    const agentSANs = agentMoveIndices.map((idx) => moveSANs[idx]);
+    let feedbackArr = [];
+    try {
+      feedbackArr = await scoreAndJustifyGame(agentFens, agentSANs);
+    } catch (e) {
+      return { error: "LLM feedback failed", details: String(e) };
+    }
+    const scoredMoves = await Promise.all(
+      moves.map(async (moveData, idx) => {
+        if (idx % 2 === 1) {
+          // Agent move: fill in feedback
+          const { fen, move } = moveData;
+          const moveSAN = String(move.san ?? `${move.from}-${move.to}`);
+          const feedback = feedbackArr.shift() || {
+            score: null,
+            justification: null,
+          };
+          const reward = await computeReward(
+            fen,
+            moveSAN,
+            feedback.score ?? "",
+            feedback.justification ?? "",
+          );
+          return {
+            ...moveData,
+            reward,
+            llmFeedback: feedback,
+          };
+        } else {
+          // User move: leave feedback/reward as null
+          return {
+            ...moveData,
+            reward: null,
+            llmFeedback: { score: null, justification: null },
+          };
+        }
+      }),
+    );
+    return { moves: scoredMoves };
+  })
+  .listen(3001);
+
+console.log("API running on http://localhost:3001");
--- a/environments/hack0/DeepSacrifice/server/llm/llm_feedback.ts
+++ b/environments/hack0/DeepSacrifice/server/llm/llm_feedback.ts
@ -0,0 +1,76 @@
+/**
+ * Placeholder for LLM-based move aggression/brilliance scoring.
+ * Will use OpenAI GPT-4o-mini via callOpenAI in the future.
+ */
+import { callOpenAI } from "./openai_client";
+
+export async function scoreMoveAggression(
+  fenHistory: string[],
+  moveIdx: number,
+  moveSAN: string,
+): Promise<string> {
+  const prompt = `Given the following chess game FEN history (one FEN per move):\n${fenHistory.map((f, i) => `${i + 1}: ${f}`).join("\n")}\nEvaluate the aggression or brilliance of move #${moveIdx + 1} (${moveSAN}). Respond ONLY with a single digit from 1 (not aggressive) to 10 (extremely aggressive/brilliant). Be fast and concise.`;
+  const response = await callOpenAI(
+    [
+      {
+        role: "system",
+        content:
+          "You are a chess grandmaster evaluating move aggression and brilliance. Respond as quickly and concisely as possible.",
+      },
+      { role: "user", content: prompt },
+    ],
+    16,
+  );
+  return response;
+}
+
+/**
+ * Placeholder for LLM-based sacrifice justification.
+ * Will use OpenAI GPT-4o-mini via callOpenAI in the future.
+ */
+export async function justifySacrifice(
+  fenHistory: string[],
+  moveIdx: number,
+  moveSAN: string,
+): Promise<string> {
+  const prompt = `Given the following chess game FEN history (one FEN per move):\n${fenHistory.map((f, i) => `${i + 1}: ${f}`).join("\n")}\nWas the sacrifice in move #${moveIdx + 1} (${moveSAN}) justified? Reply in 1 short sentence: is the sacrifice justified or not, and why. Be fast and concise.`;
+  const response = await callOpenAI(
+    [
+      {
+        role: "system",
+        content:
+          "You are a chess grandmaster evaluating sacrifices. Respond as quickly and concisely as possible.",
+      },
+      { role: "user", content: prompt },
+    ],
+    16,
+  );
+  return response;
+}
+
+export async function scoreAndJustifyGame(
+  fenHistory: string[],
+  moveSANs: string[],
+): Promise<{ score: string; justification: string }[]> {
+  const prompt = `Given the following chess game FEN history (one FEN per move) and the corresponding SAN moves, evaluate each move for aggression/brilliance and sacrifice justification.\n\nFEN history (one per move):\n${fenHistory.map((f, i) => `${i + 1}: ${f}`).join("\n")}\n\nSAN moves (one per move):\n${moveSANs.map((san, i) => `${i + 1}: ${san}`).join("\n")}\n\nFor each move, respond with a JSON array of objects, each with:\n- score: a single digit from 1 (not aggressive) to 10 (extremely aggressive/brilliant)\n- justification: 1 short sentence on whether the move is a justified sacrifice or not, and why.\n\nExample:\n[{"score": "7", "justification": "The sacrifice is risky but justified."}, ...]\n\nRespond ONLY with the JSON array, nothing else.`;
+  const response = await callOpenAI(
+    [
+      {
+        role: "system",
+        content:
+          "You are a chess grandmaster evaluating a full game for aggression and sacrifice justification. Respond as quickly and concisely as possible.",
+      },
+      { role: "user", content: prompt },
+    ],
+    512,
+  );
+  try {
+    const parsed = JSON.parse(response);
+    if (Array.isArray(parsed)) {
+      return parsed;
+    }
+    throw new Error("Response is not an array");
+  } catch (e) {
+    throw new Error("Failed to parse LLM response as JSON: " + response);
+  }
+}
--- a/environments/hack0/DeepSacrifice/server/llm/openai_client.ts
+++ b/environments/hack0/DeepSacrifice/server/llm/openai_client.ts
@ -0,0 +1,31 @@
+/**
+ * OpenAI API client for GPT-4o-mini feedback.
+ * Expects to run in Bun/Node.js where process.env is available.
+ * If you see a type error for 'process', install @types/node as a dev dependency.
+ */
+
+import OpenAI from "openai";
+import type { ChatCompletionMessageParam } from "openai/resources/chat/completions";
+
+const OPENAI_API_KEY = process.env.OPENAI_API_KEY || "";
+const MODEL = "gpt-4o-mini"; // Use gpt-4o-mini if available, else gpt-4o
+
+if (!OPENAI_API_KEY) {
+  throw new Error("OPENAI_API_KEY is not set");
+}
+
+const openai = new OpenAI({ apiKey: OPENAI_API_KEY });
+
+export async function callOpenAI(
+  messages: ChatCompletionMessageParam[],
+  max_tokens = 64,
+): Promise<string> {
+  const response = await openai.chat.completions.create({
+    model: MODEL,
+    messages,
+    max_tokens,
+  });
+  const content = response.choices[0]?.message?.content;
+  if (!content) throw new Error("No content returned from OpenAI");
+  return content.trim();
+}
--- a/environments/hack0/DeepSacrifice/server/reward/reward_fn.ts
+++ b/environments/hack0/DeepSacrifice/server/reward/reward_fn.ts
@ -0,0 +1,14 @@
+/**
+ * Placeholder reward function using LLM feedback.
+ * Will use OpenAI GPT-4o-mini feedback in the future.
+ */
+export async function computeReward(
+  fen: string,
+  moveSAN: string,
+  llmScore?: string,
+  llmJustification?: string,
+): Promise<number> {
+  // TODO: Use real LLM feedback to compute reward
+  // For now, return a dummy reward
+  return 0.5;
+}
--- a/environments/hack0/DeepSacrifice/server/runner/async_loop.ts
+++ b/environments/hack0/DeepSacrifice/server/runner/async_loop.ts
@ -0,0 +1,18 @@
+import { getAggressiveMove } from "../agents/attacker_agent";
+import { ChessEnv } from "../env/chess_env";
+
+export async function runTrainingLoop(episodes = 1) {
+  const env = new ChessEnv();
+  for (let ep = 0; ep < episodes; ep++) {
+    env.reset();
+    let done = false;
+    while (!done) {
+      const move = getAggressiveMove(env.getFEN());
+      if (!move) break;
+      const { done: isDone } = env.step(move);
+      done = isDone;
+    }
+    // Log or store game
+    console.log(`Episode ${ep + 1} finished. FEN: ${env.getFEN()}`);
+  }
+}
--- a/environments/hack0/DeepSacrifice/server/runner/user_game_loop.ts
+++ b/environments/hack0/DeepSacrifice/server/runner/user_game_loop.ts
@ -0,0 +1,61 @@
+import { Chess } from "chess.js";
+import { AttackerAgent } from "../agents/attacker_agent";
+import { ChessEnv } from "../env/chess_env";
+
+// Simulate user move by picking a random legal move
+function getRandomUserMove(fen: string): { from: string; to: string } | null {
+  const chess = new Chess(fen);
+  const moves = chess.moves({ verbose: true });
+  if (moves.length === 0) return null;
+  const move = moves[Math.floor(Math.random() * moves.length)];
+  return { from: move.from, to: move.to };
+}
+
+export async function runUserVsAgentGame() {
+  const env = new ChessEnv();
+  const agent = new AttackerAgent();
+  let done = false;
+  let moveCount = 0;
+  let fen = env.reset();
+  let userTurn = true;
+  const gameData: any[] = [];
+
+  while (!done && moveCount < 100) {
+    let move;
+    let player;
+    if (userTurn) {
+      move = getRandomUserMove(fen); // Placeholder for real user input
+      player = "user";
+      console.log(`User move:`, move);
+    } else {
+      move = agent.getMove(fen);
+      player = "agent";
+      console.log(`Agent move:`, move);
+    }
+    if (!move) break;
+    // Placeholder for LLM feedback (to be replaced with real call)
+    const llmFeedback = { score: 7, justification: "Placeholder feedback" };
+    const { fen: newFen, reward, done: isDone } = env.step(move);
+    moveCount++;
+    gameData.push({
+      moveNumber: moveCount,
+      player,
+      move,
+      fen: newFen,
+      reward,
+      llmFeedback,
+    });
+    console.log("FEN:", newFen);
+    console.log("Reward:", reward);
+    fen = newFen;
+    done = isDone;
+    userTurn = !userTurn;
+  }
+  console.log(`User-vs-Agent game finished after ${moveCount} moves.`);
+  console.log("Game data:", gameData);
+}
+
+// If run directly, play a game
+(async () => {
+  await runUserVsAgentGame();
+})();