mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-23 16:54:56 +00:00
Move to subfolder
This commit is contained in:
parent
a88e3afddf
commit
7eae51cc5c
23 changed files with 0 additions and 0 deletions
|
|
@ -0,0 +1,46 @@
|
|||
import { Chess } from "chess.js";
|
||||
|
||||
export function getAggressiveMove(
|
||||
fen: string,
|
||||
): { from: string; to: string } | null {
|
||||
const chess = new Chess(fen);
|
||||
const moves = chess.moves({ verbose: true });
|
||||
if (moves.length === 0) return null;
|
||||
|
||||
// Prefer captures
|
||||
const captures = moves.filter(
|
||||
(m) => m.flags.includes("c") || m.flags.includes("e"),
|
||||
);
|
||||
if (captures.length > 0) {
|
||||
const move = captures[Math.floor(Math.random() * captures.length)];
|
||||
return { from: move.from, to: move.to };
|
||||
}
|
||||
|
||||
// Prefer checks
|
||||
const checks = moves.filter((m) => {
|
||||
chess.move({ from: m.from, to: m.to });
|
||||
const isCheck = chess.inCheck();
|
||||
chess.undo();
|
||||
return isCheck;
|
||||
});
|
||||
if (checks.length > 0) {
|
||||
const move = checks[Math.floor(Math.random() * checks.length)];
|
||||
return { from: move.from, to: move.to };
|
||||
}
|
||||
|
||||
// Otherwise, pick random
|
||||
const move = moves[Math.floor(Math.random() * moves.length)];
|
||||
return { from: move.from, to: move.to };
|
||||
}
|
||||
|
||||
// Minimal AttackerAgent class with placeholder learning
|
||||
export class AttackerAgent {
|
||||
getMove(fen: string): { from: string; to: string } | null {
|
||||
return getAggressiveMove(fen);
|
||||
}
|
||||
|
||||
learnFromGame(gameData: any, llmFeedback: any): void {
|
||||
// TODO: Implement incremental learning from LLM feedback
|
||||
// For now, this is a placeholder
|
||||
}
|
||||
}
|
||||
149
environments/hack0/DeepSacrifice/server/api/serve.ts
Normal file
149
environments/hack0/DeepSacrifice/server/api/serve.ts
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
import { Elysia } from "elysia";
|
||||
import { getAggressiveMove } from "../agents/attacker_agent";
|
||||
import { ChessEnv } from "../env/chess_env";
|
||||
import { scoreAndJustifyGame } from "../llm/llm_feedback";
|
||||
import { computeReward } from "../reward/reward_fn";
|
||||
|
||||
const env = new ChessEnv();
|
||||
const games: any[] = [];
|
||||
let currentGame: any[] = [];
|
||||
|
||||
const app = new Elysia()
|
||||
.get("/ping", () => "pong")
|
||||
.get("/api/games/latest", () => games.slice(-5))
|
||||
.post("/api/move", async ({ body }) => {
|
||||
const { from, to, san, color } = body as {
|
||||
from: string;
|
||||
to: string;
|
||||
san?: string;
|
||||
color?: string;
|
||||
};
|
||||
const currentFen = env.getFEN();
|
||||
const currentColor = currentFen.split(" ")[1] === "b" ? "black" : "white";
|
||||
if (color && color !== currentColor) {
|
||||
return { error: `It's not ${color}'s turn.` };
|
||||
}
|
||||
// User move
|
||||
const { fen: userFen, done: userDone } = env.step({ from, to });
|
||||
const userMoveData = {
|
||||
fen: userFen,
|
||||
move: { from, to, san },
|
||||
reward: null, // Placeholder, to be filled after scoring
|
||||
llmFeedback: { score: null, justification: null },
|
||||
};
|
||||
currentGame.push(userMoveData);
|
||||
// If game is over after user move, return
|
||||
if (userDone) {
|
||||
games.push([...currentGame]);
|
||||
const moves = [...currentGame];
|
||||
currentGame = [];
|
||||
return {
|
||||
moves,
|
||||
done: true,
|
||||
};
|
||||
}
|
||||
// Agent move (as black)
|
||||
const agentMove = getAggressiveMove(userFen);
|
||||
let agentMoveData = null;
|
||||
let agentDone = false;
|
||||
if (agentMove) {
|
||||
const agentPrevFen = env.getFEN();
|
||||
const { fen: agentFen, done: agentIsDone } = env.step(agentMove);
|
||||
agentMoveData = {
|
||||
fen: agentFen,
|
||||
move: agentMove,
|
||||
reward: null, // Placeholder
|
||||
llmFeedback: { score: null, justification: null },
|
||||
};
|
||||
currentGame.push(agentMoveData);
|
||||
agentDone = agentIsDone;
|
||||
if (agentIsDone) {
|
||||
games.push([...currentGame]);
|
||||
const moves = [...currentGame];
|
||||
currentGame = [];
|
||||
return {
|
||||
moves,
|
||||
done: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
return {
|
||||
moves: [userMoveData, agentMoveData].filter(Boolean),
|
||||
done: agentDone,
|
||||
};
|
||||
})
|
||||
// .get("/evaluate", async () => {
|
||||
// const fen = env.getFEN();
|
||||
// const evalResult = await evaluatePosition(fen);
|
||||
// return evalResult;
|
||||
// })
|
||||
.post("/api/train/start", () => ({ started: true }))
|
||||
.get("/api/agent/status", () => ({ gamesPlayed: games.length, avgReward: 0 }))
|
||||
.post("/api/reset", () => {
|
||||
env.reset();
|
||||
currentGame.length = 0;
|
||||
return { fen: env.getFEN() };
|
||||
})
|
||||
.post("/api/game/llm_feedback", async ({ body }) => {
|
||||
// Expects: { moves: [{ fen, move: { from, to, san } }] }
|
||||
const { moves } = body as {
|
||||
moves: {
|
||||
fen: string;
|
||||
move: { from: string; to: string; san?: string };
|
||||
}[];
|
||||
};
|
||||
if (!Array.isArray(moves)) {
|
||||
return { error: "Missing or invalid moves array" };
|
||||
}
|
||||
const fenHistory = moves.map((m) => m.fen);
|
||||
const moveSANs = moves.map(
|
||||
(m) => m.move.san || `${m.move.from}-${m.move.to}`,
|
||||
);
|
||||
// Only score agent moves (even indices)
|
||||
const agentMoveIndices = moves
|
||||
.map((_, idx) => idx)
|
||||
.filter((idx) => idx % 2 === 1);
|
||||
const agentFens = agentMoveIndices.map((idx) => fenHistory[idx]);
|
||||
const agentSANs = agentMoveIndices.map((idx) => moveSANs[idx]);
|
||||
let feedbackArr = [];
|
||||
try {
|
||||
feedbackArr = await scoreAndJustifyGame(agentFens, agentSANs);
|
||||
} catch (e) {
|
||||
return { error: "LLM feedback failed", details: String(e) };
|
||||
}
|
||||
const scoredMoves = await Promise.all(
|
||||
moves.map(async (moveData, idx) => {
|
||||
if (idx % 2 === 1) {
|
||||
// Agent move: fill in feedback
|
||||
const { fen, move } = moveData;
|
||||
const moveSAN = String(move.san ?? `${move.from}-${move.to}`);
|
||||
const feedback = feedbackArr.shift() || {
|
||||
score: null,
|
||||
justification: null,
|
||||
};
|
||||
const reward = await computeReward(
|
||||
fen,
|
||||
moveSAN,
|
||||
feedback.score ?? "",
|
||||
feedback.justification ?? "",
|
||||
);
|
||||
return {
|
||||
...moveData,
|
||||
reward,
|
||||
llmFeedback: feedback,
|
||||
};
|
||||
} else {
|
||||
// User move: leave feedback/reward as null
|
||||
return {
|
||||
...moveData,
|
||||
reward: null,
|
||||
llmFeedback: { score: null, justification: null },
|
||||
};
|
||||
}
|
||||
}),
|
||||
);
|
||||
return { moves: scoredMoves };
|
||||
})
|
||||
.listen(3001);
|
||||
|
||||
console.log("API running on http://localhost:3001");
|
||||
76
environments/hack0/DeepSacrifice/server/llm/llm_feedback.ts
Normal file
76
environments/hack0/DeepSacrifice/server/llm/llm_feedback.ts
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
/**
|
||||
* Placeholder for LLM-based move aggression/brilliance scoring.
|
||||
* Will use OpenAI GPT-4o-mini via callOpenAI in the future.
|
||||
*/
|
||||
import { callOpenAI } from "./openai_client";
|
||||
|
||||
export async function scoreMoveAggression(
|
||||
fenHistory: string[],
|
||||
moveIdx: number,
|
||||
moveSAN: string,
|
||||
): Promise<string> {
|
||||
const prompt = `Given the following chess game FEN history (one FEN per move):\n${fenHistory.map((f, i) => `${i + 1}: ${f}`).join("\n")}\nEvaluate the aggression or brilliance of move #${moveIdx + 1} (${moveSAN}). Respond ONLY with a single digit from 1 (not aggressive) to 10 (extremely aggressive/brilliant). Be fast and concise.`;
|
||||
const response = await callOpenAI(
|
||||
[
|
||||
{
|
||||
role: "system",
|
||||
content:
|
||||
"You are a chess grandmaster evaluating move aggression and brilliance. Respond as quickly and concisely as possible.",
|
||||
},
|
||||
{ role: "user", content: prompt },
|
||||
],
|
||||
16,
|
||||
);
|
||||
return response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Placeholder for LLM-based sacrifice justification.
|
||||
* Will use OpenAI GPT-4o-mini via callOpenAI in the future.
|
||||
*/
|
||||
export async function justifySacrifice(
|
||||
fenHistory: string[],
|
||||
moveIdx: number,
|
||||
moveSAN: string,
|
||||
): Promise<string> {
|
||||
const prompt = `Given the following chess game FEN history (one FEN per move):\n${fenHistory.map((f, i) => `${i + 1}: ${f}`).join("\n")}\nWas the sacrifice in move #${moveIdx + 1} (${moveSAN}) justified? Reply in 1 short sentence: is the sacrifice justified or not, and why. Be fast and concise.`;
|
||||
const response = await callOpenAI(
|
||||
[
|
||||
{
|
||||
role: "system",
|
||||
content:
|
||||
"You are a chess grandmaster evaluating sacrifices. Respond as quickly and concisely as possible.",
|
||||
},
|
||||
{ role: "user", content: prompt },
|
||||
],
|
||||
16,
|
||||
);
|
||||
return response;
|
||||
}
|
||||
|
||||
export async function scoreAndJustifyGame(
|
||||
fenHistory: string[],
|
||||
moveSANs: string[],
|
||||
): Promise<{ score: string; justification: string }[]> {
|
||||
const prompt = `Given the following chess game FEN history (one FEN per move) and the corresponding SAN moves, evaluate each move for aggression/brilliance and sacrifice justification.\n\nFEN history (one per move):\n${fenHistory.map((f, i) => `${i + 1}: ${f}`).join("\n")}\n\nSAN moves (one per move):\n${moveSANs.map((san, i) => `${i + 1}: ${san}`).join("\n")}\n\nFor each move, respond with a JSON array of objects, each with:\n- score: a single digit from 1 (not aggressive) to 10 (extremely aggressive/brilliant)\n- justification: 1 short sentence on whether the move is a justified sacrifice or not, and why.\n\nExample:\n[{"score": "7", "justification": "The sacrifice is risky but justified."}, ...]\n\nRespond ONLY with the JSON array, nothing else.`;
|
||||
const response = await callOpenAI(
|
||||
[
|
||||
{
|
||||
role: "system",
|
||||
content:
|
||||
"You are a chess grandmaster evaluating a full game for aggression and sacrifice justification. Respond as quickly and concisely as possible.",
|
||||
},
|
||||
{ role: "user", content: prompt },
|
||||
],
|
||||
512,
|
||||
);
|
||||
try {
|
||||
const parsed = JSON.parse(response);
|
||||
if (Array.isArray(parsed)) {
|
||||
return parsed;
|
||||
}
|
||||
throw new Error("Response is not an array");
|
||||
} catch (e) {
|
||||
throw new Error("Failed to parse LLM response as JSON: " + response);
|
||||
}
|
||||
}
|
||||
31
environments/hack0/DeepSacrifice/server/llm/openai_client.ts
Normal file
31
environments/hack0/DeepSacrifice/server/llm/openai_client.ts
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
/**
|
||||
* OpenAI API client for GPT-4o-mini feedback.
|
||||
* Expects to run in Bun/Node.js where process.env is available.
|
||||
* If you see a type error for 'process', install @types/node as a dev dependency.
|
||||
*/
|
||||
|
||||
import OpenAI from "openai";
|
||||
import type { ChatCompletionMessageParam } from "openai/resources/chat/completions";
|
||||
|
||||
const OPENAI_API_KEY = process.env.OPENAI_API_KEY || "";
|
||||
const MODEL = "gpt-4o-mini"; // Use gpt-4o-mini if available, else gpt-4o
|
||||
|
||||
if (!OPENAI_API_KEY) {
|
||||
throw new Error("OPENAI_API_KEY is not set");
|
||||
}
|
||||
|
||||
const openai = new OpenAI({ apiKey: OPENAI_API_KEY });
|
||||
|
||||
export async function callOpenAI(
|
||||
messages: ChatCompletionMessageParam[],
|
||||
max_tokens = 64,
|
||||
): Promise<string> {
|
||||
const response = await openai.chat.completions.create({
|
||||
model: MODEL,
|
||||
messages,
|
||||
max_tokens,
|
||||
});
|
||||
const content = response.choices[0]?.message?.content;
|
||||
if (!content) throw new Error("No content returned from OpenAI");
|
||||
return content.trim();
|
||||
}
|
||||
14
environments/hack0/DeepSacrifice/server/reward/reward_fn.ts
Normal file
14
environments/hack0/DeepSacrifice/server/reward/reward_fn.ts
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
/**
|
||||
* Placeholder reward function using LLM feedback.
|
||||
* Will use OpenAI GPT-4o-mini feedback in the future.
|
||||
*/
|
||||
export async function computeReward(
|
||||
fen: string,
|
||||
moveSAN: string,
|
||||
llmScore?: string,
|
||||
llmJustification?: string,
|
||||
): Promise<number> {
|
||||
// TODO: Use real LLM feedback to compute reward
|
||||
// For now, return a dummy reward
|
||||
return 0.5;
|
||||
}
|
||||
18
environments/hack0/DeepSacrifice/server/runner/async_loop.ts
Normal file
18
environments/hack0/DeepSacrifice/server/runner/async_loop.ts
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
import { getAggressiveMove } from "../agents/attacker_agent";
|
||||
import { ChessEnv } from "../env/chess_env";
|
||||
|
||||
export async function runTrainingLoop(episodes = 1) {
|
||||
const env = new ChessEnv();
|
||||
for (let ep = 0; ep < episodes; ep++) {
|
||||
env.reset();
|
||||
let done = false;
|
||||
while (!done) {
|
||||
const move = getAggressiveMove(env.getFEN());
|
||||
if (!move) break;
|
||||
const { done: isDone } = env.step(move);
|
||||
done = isDone;
|
||||
}
|
||||
// Log or store game
|
||||
console.log(`Episode ${ep + 1} finished. FEN: ${env.getFEN()}`);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
import { Chess } from "chess.js";
|
||||
import { AttackerAgent } from "../agents/attacker_agent";
|
||||
import { ChessEnv } from "../env/chess_env";
|
||||
|
||||
// Simulate user move by picking a random legal move
|
||||
function getRandomUserMove(fen: string): { from: string; to: string } | null {
|
||||
const chess = new Chess(fen);
|
||||
const moves = chess.moves({ verbose: true });
|
||||
if (moves.length === 0) return null;
|
||||
const move = moves[Math.floor(Math.random() * moves.length)];
|
||||
return { from: move.from, to: move.to };
|
||||
}
|
||||
|
||||
export async function runUserVsAgentGame() {
|
||||
const env = new ChessEnv();
|
||||
const agent = new AttackerAgent();
|
||||
let done = false;
|
||||
let moveCount = 0;
|
||||
let fen = env.reset();
|
||||
let userTurn = true;
|
||||
const gameData: any[] = [];
|
||||
|
||||
while (!done && moveCount < 100) {
|
||||
let move;
|
||||
let player;
|
||||
if (userTurn) {
|
||||
move = getRandomUserMove(fen); // Placeholder for real user input
|
||||
player = "user";
|
||||
console.log(`User move:`, move);
|
||||
} else {
|
||||
move = agent.getMove(fen);
|
||||
player = "agent";
|
||||
console.log(`Agent move:`, move);
|
||||
}
|
||||
if (!move) break;
|
||||
// Placeholder for LLM feedback (to be replaced with real call)
|
||||
const llmFeedback = { score: 7, justification: "Placeholder feedback" };
|
||||
const { fen: newFen, reward, done: isDone } = env.step(move);
|
||||
moveCount++;
|
||||
gameData.push({
|
||||
moveNumber: moveCount,
|
||||
player,
|
||||
move,
|
||||
fen: newFen,
|
||||
reward,
|
||||
llmFeedback,
|
||||
});
|
||||
console.log("FEN:", newFen);
|
||||
console.log("Reward:", reward);
|
||||
fen = newFen;
|
||||
done = isDone;
|
||||
userTurn = !userTurn;
|
||||
}
|
||||
console.log(`User-vs-Agent game finished after ${moveCount} moves.`);
|
||||
console.log("Game data:", gameData);
|
||||
}
|
||||
|
||||
// If run directly, play a game
|
||||
(async () => {
|
||||
await runUserVsAgentGame();
|
||||
})();
|
||||
Loading…
Add table
Add a link
Reference in a new issue