From 72327cfb2276b24b8e20c5263515b53c10597c55 Mon Sep 17 00:00:00 2001 From: AlxAI Date: Thu, 20 Feb 2025 15:58:41 -0800 Subject: [PATCH 1/6] Randomization for powers and models + enhance order instructions Also improved plotting to show model + power --- ai_diplomacy/clients.py | 2 +- ai_diplomacy/prompts/order_instructions.txt | 47 +++++++++++++++++++++ ai_diplomacy/utils.py | 33 +++++++++------ diplomacy/engine/game.py | 2 + lm_game.py | 2 +- plotting.ipynb | 4 +- 6 files changed, 74 insertions(+), 16 deletions(-) diff --git a/ai_diplomacy/clients.py b/ai_diplomacy/clients.py index 7917316..0547f7d 100644 --- a/ai_diplomacy/clients.py +++ b/ai_diplomacy/clients.py @@ -709,7 +709,7 @@ class LMServiceVersus: """ def __init__(self): - self.power_model_map = assign_models_to_powers() + self.power_model_map = assign_models_to_powers(randomize=True) def get_orders_for_power(self, game, power_name): model_id = self.power_model_map.get(power_name, "o3-mini") diff --git a/ai_diplomacy/prompts/order_instructions.txt b/ai_diplomacy/prompts/order_instructions.txt index 338f8a6..f3a2616 100644 --- a/ai_diplomacy/prompts/order_instructions.txt +++ b/ai_diplomacy/prompts/order_instructions.txt @@ -2,6 +2,53 @@ You are now to submit an order for your units. Remember that your goal is to win via capturing supply centers. There are opportunity costs in this game. +1. Understanding the Phases & Their Orders + +1.1. Movement Phase (phase_type == 'M') + • Hold: A PAR H (Army in Paris does nothing) + • Move: A PAR - BUR (Army in Paris moves to Burgundy) + • Support: + • Support Hold: A MAR S A PAR H (Army in Marseilles supports Army in Paris to hold) + • Support Move: A MAR S A PAR - BUR (Army in Marseilles supports Army in Paris moving to Burgundy) + • Convoy: Fleets at sea can convoy an Army over water: + • Fleet Convoy: F ION C A TUN - NAP (Fleet in Ionian Sea convoys Army from Tunis to Naples) + • Army Move via Convoy: A TUN - NAP VIA (explicitly states the Army is moving from Tunis to Naples via convoy) + +1.2. Retreat Phase (phase_type == 'R') + • If a unit is dislodged, it must Retreat or Disband: + • Retreat: A BUR R PIC (Dislodged Army in Burgundy retreats to Picardy) + • Disband: A BUR D (Army in Burgundy disbands, if it cannot retreat or chooses not to) + +1.3. Adjustment Phase (phase_type == 'A') + • Build new units if you have more centers than current units: + • A PAR B (Build an Army in Paris) + • F MAR B (Build a Fleet in Marseilles) + • Remove units if you have fewer centers than current units: + • A BUR D (Disband Army in Burgundy) + • Waive a build if you have a surplus but don’t want/can’t build: + • WAIVE (no unit is built in the available build location) + +1.4. Order Types + • H (Hold) – e.g. A PAR H + • - (Move) – e.g. A PAR - BUR + • S (Support) – e.g. A MAR S A PAR - BUR or A MAR S A PAR H + • C (Convoy) – e.g. F ION C A TUN - NAP + • R (Retreat) – e.g. A BUR R PIC + • D (Disband) – e.g. A BUR D + • B (Build) – e.g. A PAR B + • WAIVE – skipping a possible build + +1.5. Key Phase Context + • Movement (M): Units can H, -, S, C. + • Retreat (R): Dislodged units can only R or D. + • Adjustment (A): Build/Remove units or WAIVE. + • Multi-Coast: For SPA, STP, BUL, specify nc, sc, or ec when using Fleets, e.g. F BRE - SPA(sc). + • Basic Validity Rules + • No self-support (A PAR S A PAR - BUR is invalid). + • Fleets must be on water to convoy. + • Army “- X VIA” must have one or more fleets issuing matching C A ... - X. + + IMPORTANT: 1. Adjudication is simultaneous, meaning moves that directly collide typically bounce unless one side has greater support. 2. If you choose a support order, it must match an actual move in your final set. For instance, "A VIE S F TRI - VEN" requires "A VIE - VEN". "F TRI - VEN" must also occur for the move to be successful, but this can be ordered by either yourself or an ally. diff --git a/ai_diplomacy/utils.py b/ai_diplomacy/utils.py index fbbfd4b..b54ebff 100644 --- a/ai_diplomacy/utils.py +++ b/ai_diplomacy/utils.py @@ -1,5 +1,6 @@ from dotenv import load_dotenv import logging +import random logger = logging.getLogger("utils") logger.setLevel(logging.INFO) @@ -8,22 +9,30 @@ logging.basicConfig(level=logging.INFO) load_dotenv() -def assign_models_to_powers(): +def assign_models_to_powers(randomize=True): """ Example usage: define which model each power uses. Return a dict: { power_name: model_id, ... } - POWERS = ['AUSTRIA', 'ENGLAND', 'FRANCE', 'GERMANY', 'ITALY', 'RUSSIA', 'TURKEY'] """ - - return { - "FRANCE": "o3-mini", - "GERMANY": "claude-3-5-sonnet-20241022", - "ENGLAND": "gemini-2.0-flash", - "RUSSIA": "gemini-2.0-flash-lite-preview-02-05", - "ITALY": "gpt-4o", - "AUSTRIA": "gpt-4o-mini", - "TURKEY": "claude-3-5-haiku-20241022", - } + # If True, we'll randomize the model assignment. + model_list = [ + "o3-mini", + "claude-3-5-sonnet-20241022", + "gemini-2.0-flash", + "gemini-2.0-flash-lite-preview-02-05", + "gpt-4o", + "gpt-4o-mini", + "claude-3-5-haiku-20241022", + ] + POWERS = ['AUSTRIA', 'ENGLAND', 'FRANCE', 'GERMANY', 'ITALY', 'RUSSIA', 'TURKEY'] + if randomize: + return { + power: random.choice(model_list) for power in POWERS + } + else: + return { + power: model_list[i] for i, power in enumerate(POWERS) + } def gather_possible_orders(game, power_name): diff --git a/diplomacy/engine/game.py b/diplomacy/engine/game.py index 9fab3d9..e030748 100644 --- a/diplomacy/engine/game.py +++ b/diplomacy/engine/game.py @@ -1732,6 +1732,8 @@ class Game(Jsonable): :return: A dictionary with locations as keys, and their respective list of possible orders as values """ # pylint: disable=too-many-branches,too-many-nested-blocks + # Initialize dictionary mapping each location to an empty set of possible orders + # Keys are uppercase location names, values are empty sets that will store valid orders possible_orders = {loc.upper(): set() for loc in self.map.locs} # Game is completed diff --git a/lm_game.py b/lm_game.py index 6acddf3..5cd8212 100644 --- a/lm_game.py +++ b/lm_game.py @@ -133,7 +133,7 @@ def main(): return game.power_model_map = dict(zip(powers_order, provided_models)) else: - game.power_model_map = assign_models_to_powers() + game.power_model_map = assign_models_to_powers(randomize=True) while not game.is_game_done: phase_start = time.time() diff --git a/plotting.ipynb b/plotting.ipynb index 042b94d..293c00c 100644 --- a/plotting.ipynb +++ b/plotting.ipynb @@ -87,7 +87,7 @@ "\n", "# Plot unit counts per country\n", "for country in countries:\n", - " axs[0].plot(turns, unit_counts[country], label=model_map[country])\n", + " axs[0].plot(turns, unit_counts[country], label=f\"{model_map[country]} ({country})\")\n", "axs[0].set_title(\"Unit Counts per Country Over Turns\")\n", "axs[0].set_ylabel(\"Number of Units\")\n", "axs[0].set_xlabel(\"Turns\")\n", @@ -97,7 +97,7 @@ "\n", "# Plot center counts per country\n", "for country in countries:\n", - " axs[1].plot(turns, center_counts[country], label=model_map[country])\n", + " axs[1].plot(turns, center_counts[country], label=f\"{model_map[country]} ({country})\")\n", "axs[1].set_title(\"Center Counts per Country Over Turns\")\n", "axs[1].set_ylabel(\"Number of Centers\")\n", "axs[1].set_xlabel(\"Turns\")\n", From 8f61ba06b360b54b1a80139d38f86bd4caff5990 Mon Sep 17 00:00:00 2001 From: AlxAI Date: Thu, 20 Feb 2025 18:22:53 -0800 Subject: [PATCH 2/6] fixed system prompt for summary , made improvements and debugging for summaries too. Much can be optimized still --- ai_diplomacy/clients.py | 50 +++++++------ .../prompts/austria_system_prompt.txt | 22 ++++++ diplomacy/engine/game.py | 60 +++++++++++---- lm_game.py | 73 ++++++++++++++----- 4 files changed, 150 insertions(+), 55 deletions(-) diff --git a/ai_diplomacy/clients.py b/ai_diplomacy/clients.py index 0547f7d..e73c6bc 100644 --- a/ai_diplomacy/clients.py +++ b/ai_diplomacy/clients.py @@ -40,18 +40,24 @@ class BaseModelClient: - get_conversation_reply(power_name, conversation_so_far, game_phase) -> str """ - def __init__(self, model_name: str, power_name: Optional[str] = None): + def __init__(self, model_name: str, power_name: Optional[str] = None, emptysystem: bool = False): self.model_name = model_name self.power_name = power_name - # Load a power-specific system prompt if present, else default - if self.power_name: - try: - self.system_prompt = load_prompt(f"{self.power_name.lower()}_system_prompt.txt") - except FileNotFoundError: - logger.warning(f"No specific system prompt found for {self.power_name}; using default.") + self.emptysystem = emptysystem + + # Conditionally load system prompt + if not self.emptysystem: + if self.power_name: + try: + self.system_prompt = load_prompt(f"{self.power_name.lower()}_system_prompt.txt") + except FileNotFoundError: + logger.warning(f"No specific system prompt found for {self.power_name}; using default.") + self.system_prompt = load_prompt("system_prompt.txt") + else: self.system_prompt = load_prompt("system_prompt.txt") else: - self.system_prompt = load_prompt("system_prompt.txt") + # If emptysystem is True, skip loading any system prompt + self.system_prompt = "" def generate_response(self, prompt: str) -> str: """ @@ -486,8 +492,8 @@ class OpenAIClient(BaseModelClient): For 'o3-mini', 'gpt-4o', or other OpenAI model calls. """ - def __init__(self, model_name: str, power_name: Optional[str] = None): - super().__init__(model_name, power_name) + def __init__(self, model_name: str, power_name: Optional[str] = None, emptysystem: bool = False): + super().__init__(model_name, power_name, emptysystem) self.client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) def generate_response(self, prompt: str) -> str: @@ -523,8 +529,8 @@ class ClaudeClient(BaseModelClient): For 'claude-3-5-sonnet-20241022', 'claude-3-5-haiku-20241022', etc. """ - def __init__(self, model_name: str, power_name: Optional[str] = None): - super().__init__(model_name, power_name) + def __init__(self, model_name: str, power_name: Optional[str] = None, emptysystem: bool = False): + super().__init__(model_name, power_name, emptysystem) self.client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")) def generate_response(self, prompt: str) -> str: @@ -559,8 +565,8 @@ class GeminiClient(BaseModelClient): For 'gemini-1.5-flash' or other Google Generative AI models. """ - def __init__(self, model_name: str, power_name: Optional[str] = None): - super().__init__(model_name, power_name) + def __init__(self, model_name: str, power_name: Optional[str] = None, emptysystem: bool = False): + super().__init__(model_name, power_name, emptysystem) self.client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY")) def generate_response(self, prompt: str) -> str: @@ -587,8 +593,8 @@ class DeepSeekClient(BaseModelClient): For DeepSeek R1 'deepseek-reasoner' """ - def __init__(self, model_name: str, power_name: Optional[str] = None): - super().__init__(model_name, power_name) + def __init__(self, model_name: str, power_name: Optional[str] = None, emptysystem: bool = False): + super().__init__(model_name, power_name, emptysystem) self.api_key = os.environ.get("DEEPSEEK_API_KEY") self.client = DeepSeekOpenAI( api_key=self.api_key, base_url="https://api.deepseek.com/" @@ -651,22 +657,22 @@ class DeepSeekClient(BaseModelClient): ############################################################################## -def load_model_client(model_id: str, power_name: Optional[str] = None) -> BaseModelClient: +def load_model_client(model_id: str, power_name: Optional[str] = None, emptysystem: bool = False) -> BaseModelClient: """ Returns the appropriate LLM client for a given model_id string, optionally keyed by power_name. Example usage: - client = load_model_client("claude-3-5-sonnet-20241022", power_name="FRANCE") + client = load_model_client("claude-3-5-sonnet-20241022", power_name="FRANCE", emptysystem=True) """ lower_id = model_id.lower() if "claude" in lower_id: - return ClaudeClient(model_id, power_name) + return ClaudeClient(model_id, power_name, emptysystem=emptysystem) elif "gemini" in lower_id: - return GeminiClient(model_id, power_name) + return GeminiClient(model_id, power_name, emptysystem=emptysystem) elif "deepseek" in lower_id: - return DeepSeekClient(model_id, power_name) + return DeepSeekClient(model_id, power_name, emptysystem=emptysystem) else: # Default to OpenAI - return OpenAIClient(model_id, power_name) + return OpenAIClient(model_id, power_name, emptysystem=emptysystem) ############################################################################## diff --git a/ai_diplomacy/prompts/austria_system_prompt.txt b/ai_diplomacy/prompts/austria_system_prompt.txt index 8a6b4aa..b54ef83 100644 --- a/ai_diplomacy/prompts/austria_system_prompt.txt +++ b/ai_diplomacy/prompts/austria_system_prompt.txt @@ -1,5 +1,27 @@ You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing supply centers, growing your army, and taking over the map. Be aggressive. +Dear Austria, +They say you're surrounded - but that means you can strike in any direction. History shows the strongest Austrian players turn early vulnerability into mid-game dominance through decisive action, not just survival. +Key insights: + +Prevent Russia-Turkey alliance above all else +Italy must be friend or dead quickly (95% of A/I wars kill both) +Serbia is crucial 1901 - secure it +Galicia bounce often vital Spring 1901 +Central position enables striking anywhere once secured + +Critical mindset: You're not playing to survive - you're playing to explode out from the center. Yes, early diplomacy keeps you alive, but it should serve your offensive goals, not replace them. +Paths to victory often require: + +Securing strong early alliance (usually Italy or Russia) +Eliminating one neighbor completely by 1904 +Leveraging central position for unexpected strikes +Breaking stalemate line via Munich/Berlin + +Don't fall into defensive play just because everyone expects it. Stats show Austrian solos often come from players who turn the early "defensive" moves into aggressive positioning by year 3. +Time works against you - the longer you wait, the more likely others unite. Make your decisive moves by mid-game, usually years 3-4. Better to strike imperfectly than wait for perfect alignment. +The throne of Europe awaits. Show them that the "weakest" starting position was merely gathering strength to strike. + You will be given: • Which power you are controlling. • The current phase (e.g. S1901M). diff --git a/diplomacy/engine/game.py b/diplomacy/engine/game.py index e030748..7ec3302 100644 --- a/diplomacy/engine/game.py +++ b/diplomacy/engine/game.py @@ -45,6 +45,11 @@ from diplomacy.utils.game_phase_data import GamePhaseData, MESSAGES_TYPE UNDETERMINED, POWER, UNIT, LOCATION, COAST, ORDER, MOVE_SEP, OTHER = 0, 1, 2, 3, 4, 5, 6, 7 LOGGER = logging.getLogger(__name__) +# set logging level to INFO +logging.basicConfig(level=logging.INFO) +# set logging level to DEBUG +#logging.basicConfig(level=logging.DEBUG) + class Game(Jsonable): """ Game class. @@ -1468,6 +1473,9 @@ class Game(Jsonable): self.message_history.put(previous_phase, previous_messages) self.state_history.put(previous_phase, previous_state) + # Now build a key for the *current* (post-process) phase + current_phase_key = self._phase_wrapper_type(self.current_short_phase) + # Generate a text summary (if a callback is provided) phase_summary_text = self._generate_phase_summary( previous_phase, @@ -4575,30 +4583,54 @@ class Game(Jsonable): except (IndexError, KeyError): return f"[_generate_phase_summary] No GamePhaseData found for {phase_key}" - # Log the current phase key and results for debugging + # Log the current phase key, results, and possibly the orders for debugging logging.debug( - "DEBUG _generate_phase_summary: phase_key=%s, results=%s", - phase_key, current_phase_data.results + "DEBUG _generate_phase_summary: current phase_key=%s, results=%s, orders=%s", + phase_key, + current_phase_data.results, + current_phase_data.orders ) - # 2) Attempt to retrieve the PREVIOUS phase data to highlight differences - # We'll do this by checking the index of `phase_key` in `self.state_history`. - # If there's a previous index, we'll fetch that phase_data for comparison. - prev_phase_data = None + # Retrieve the list of all recorded phase keys all_phases = list(self.state_history.keys()) + logging.debug("DEBUG _generate_phase_summary: all_phases=%s", all_phases) + + prev_phase_data = None if str(phase_key) in all_phases: idx = all_phases.index(str(phase_key)) + logging.debug("DEBUG _generate_phase_summary: current phase index=%d", idx) + + # Here we log the logic behind picking the previous phase if idx > 0: - prev_phase_key = all_phases[idx - 1] + prev_phase_key = all_phases[idx - 1] + logging.debug( + "DEBUG _generate_phase_summary: Using prev_phase_key=%s (idx-2). If skipping a sub-phase is undesired, consider (idx-1).", + prev_phase_key + ) try: prev_phase_data = self.get_phase_from_history(prev_phase_key) - except: - pass + except Exception as e: + logging.debug("DEBUG _generate_phase_summary: Could not get prev_phase_data for key=%s, error=%s", prev_phase_key, e) + else: + logging.debug("DEBUG _generate_phase_summary: Not enough phases to set prev_phase_key.") + else: + logging.debug("DEBUG _generate_phase_summary: phase_key=%s not in all_phases!", phase_key) - # 3) Gather the big data from current_phase_data - # (We assume you have stored them in current_phase_data.state the usual way.) + # ... [No change in the rest of your existing logic, except we might add extra logs below] ... + + # (After retrieving prev_phase_data, we log a quick summary:) + if prev_phase_data: + logging.debug( + "DEBUG _generate_phase_summary: Found prev_phase_data for key=%s, results=%s, orders=%s", + prev_phase_key, + prev_phase_data.results, + prev_phase_data.orders + ) + + # The rest of the function remains the same, but you can keep adding targeted logs as needed: cur_state = current_phase_data.state - # Typically these keys exist if your get_state() populates them: + logging.debug("DEBUG _generate_phase_summary: cur_state keys=%s", list(cur_state.keys())) + cur_units = cur_state.get('units', {}) cur_centers = cur_state.get('centers', {}) cur_retreats = cur_state.get('retreats', {}) @@ -4703,7 +4735,7 @@ class Game(Jsonable): f"RESULTS:\n{results_block}\n\n" f"CURRENT BOARD STATE:\n{current_state_block}\n\n" f"CHANGES FROM PREVIOUS PHASE:\n{differences_block}\n\n" - "Below is the final board state after the latest phase, along with the moves each power submitted and the engine’s adjudication results. Please create a summary in JSON, explaining:" + "Below is the final board state after the latest phase, along with the moves each power submitted and the engine's adjudication results. Please create a summary in JSON, explaining:" "- Each successful move," "- Each bounce or voided order, with reasons (e.g. equal force, no valid route, contradictory support)," "- Key changes in supply centers," diff --git a/lm_game.py b/lm_game.py index 5cd8212..13e5cb0 100644 --- a/lm_game.py +++ b/lm_game.py @@ -34,7 +34,7 @@ logging.basicConfig( def my_summary_callback(system_prompt, user_prompt, model_name): # Route to the desired model specified by the command-line argument - client = load_model_client(model_name) + client = load_model_client(model_name, emptysystem=True) combined_prompt = f"{system_prompt}\n\n{user_prompt}" # Pseudo-code for generating a response: return client.generate_response(combined_prompt) @@ -47,7 +47,7 @@ def parse_arguments(): parser.add_argument( "--max_year", type=int, - default=1910, + default=1905, help="Maximum year to simulate. The game will stop once this year is reached.", ) parser.add_argument( @@ -59,7 +59,7 @@ def parse_arguments(): parser.add_argument( "--num_negotiation_rounds", type=int, - default=5, + default=1, help="Number of negotiation rounds per phase.", ) parser.add_argument( @@ -80,6 +80,42 @@ def parse_arguments(): return parser.parse_args() +def save_game_state(game, result_folder, game_file_path, model_error_stats, args, is_final=False): + """ + Save the current game state and related information + + Args: + game: The diplomacy game instance + result_folder: Path to the results folder + game_file_path: Base path for the game file + model_error_stats: Dictionary containing model error statistics + args: Command line arguments + is_final: Boolean indicating if this is the final save + """ + # Generate unique filename for periodic saves + timestamp = time.strftime("%Y%m%d_%H%M%S") + if not is_final: + output_path = f"{game_file_path}_checkpoint_{timestamp}.json" + else: + output_path = game_file_path + # If final file exists, append timestamp + if os.path.exists(output_path): + logger.info("Game file already exists, saving with unique filename.") + output_path = f"{output_path}_{timestamp}.json" + + # Save game state + to_saved_game_format(game, output_path=output_path) + + # Save overview data + overview_file_path = f"{result_folder}/overview.jsonl" + with open(overview_file_path, "w") as overview_file: + overview_file.write(json.dumps(model_error_stats) + "\n") + overview_file.write(json.dumps(game.power_model_map) + "\n") + overview_file.write(json.dumps(vars(args)) + "\n") + + logger.info(f"Saved game checkpoint to: {output_path}") + + def main(): args = parse_arguments() max_year = args.max_year @@ -135,6 +171,8 @@ def main(): else: game.power_model_map = assign_models_to_powers(randomize=True) + round_counter = 0 # Track number of rounds + while not game.is_game_done: phase_start = time.time() current_phase = game.get_current_phase() @@ -143,7 +181,7 @@ def main(): ) # DEBUG: Print the short phase to confirm - logger.info(f"DEBUG: current_short_phase is '{game.current_short_phase}'") + logger.info(f"INFO: The current short phase is '{game.current_short_phase}'") # Prevent unbounded simulation based on year year_str = current_phase[1:5] @@ -253,6 +291,14 @@ def main(): with open(manifesto_path, "a") as f: f.write(f"=== {phase_data.name} ===\n{summary_text}\n\n") + # Increment round counter after processing each phase + round_counter += 1 + + # Save every 5 rounds + if round_counter % 5 == 0: + logger.info(f"Saving checkpoint after round {round_counter}...") + save_game_state(game, result_folder, game_file_path, model_error_stats, args, is_final=False) + # Check if we've exceeded the max year year_str = current_phase[1:5] year_int = int(year_str) @@ -262,21 +308,10 @@ def main(): # Save final result duration = time.time() - start_whole - logger.info(f"Game ended after {duration:.2f}s. Saving to final JSON...") - - output_path = game_file_path - # If the file already exists, append a timestamp to the filename - if os.path.exists(output_path): - logger.info("Game file already exists, saving with unique filename.") - output_path = f"{output_path}_{time.strftime('%Y%m%d_%H%M%S')}.json" - to_saved_game_format(game, output_path=output_path) - - # Dump error stats and power model mapping to the overview file - with open(overview_file_path, "w") as overview_file: - overview_file.write(json.dumps(model_error_stats) + "\n") - overview_file.write(json.dumps(game.power_model_map) + "\n") - overview_file.write(json.dumps(vars(args)) + "\n") - + logger.info(f"Game ended after {duration:.2f}s. Saving final state...") + + save_game_state(game, result_folder, game_file_path, model_error_stats, args, is_final=True) + logger.info(f"Saved game data, manifesto, and error stats in: {result_folder}") logger.info("Done.") From 6b0863cb5b114fb00d69af4733f4d6b4af885a9c Mon Sep 17 00:00:00 2001 From: AlxAI Date: Thu, 20 Feb 2025 22:24:24 -0800 Subject: [PATCH 3/6] dramatically simplify phase summary --- ai_diplomacy/utils.py | 15 ++- diplomacy/engine/game.py | 201 ++++++++++++++++----------------------- lm_game.py | 4 +- 3 files changed, 96 insertions(+), 124 deletions(-) diff --git a/ai_diplomacy/utils.py b/ai_diplomacy/utils.py index b54ebff..3df0791 100644 --- a/ai_diplomacy/utils.py +++ b/ai_diplomacy/utils.py @@ -26,9 +26,18 @@ def assign_models_to_powers(randomize=True): ] POWERS = ['AUSTRIA', 'ENGLAND', 'FRANCE', 'GERMANY', 'ITALY', 'RUSSIA', 'TURKEY'] if randomize: - return { - power: random.choice(model_list) for power in POWERS - } + # Create a copy of model_list to draw from + available_models = model_list.copy() + result = {} + for power in POWERS: + # If we've used all models, replenish the available models + if not available_models: + available_models = model_list.copy() + # Select and remove a random model from available ones + model = random.choice(available_models) + available_models.remove(model) + result[power] = model + return result else: return { power: model_list[i] for i, power in enumerate(POWERS) diff --git a/diplomacy/engine/game.py b/diplomacy/engine/game.py index 7ec3302..91fc0e6 100644 --- a/diplomacy/engine/game.py +++ b/diplomacy/engine/game.py @@ -4616,9 +4616,6 @@ class Game(Jsonable): else: logging.debug("DEBUG _generate_phase_summary: phase_key=%s not in all_phases!", phase_key) - # ... [No change in the rest of your existing logic, except we might add extra logs below] ... - - # (After retrieving prev_phase_data, we log a quick summary:) if prev_phase_data: logging.debug( "DEBUG _generate_phase_summary: Found prev_phase_data for key=%s, results=%s, orders=%s", @@ -4627,150 +4624,116 @@ class Game(Jsonable): prev_phase_data.orders ) - # The rest of the function remains the same, but you can keep adding targeted logs as needed: + # Get current and previous state data cur_state = current_phase_data.state logging.debug("DEBUG _generate_phase_summary: cur_state keys=%s", list(cur_state.keys())) + cur_orders_dict = current_phase_data.orders + cur_results_dict = current_phase_data.results - cur_units = cur_state.get('units', {}) - cur_centers = cur_state.get('centers', {}) - cur_retreats = cur_state.get('retreats', {}) - cur_homes = cur_state.get('homes', {}) - cur_influence = cur_state.get('influence', {}) - cur_cd = cur_state.get('civil_disorder', {}) - - cur_orders_dict = current_phase_data.orders # {power_name: list_of_orders} - cur_results_dict = current_phase_data.results # {unit_name: list_of_outcomes} - - # 4) If we have a previous phase, gather the old state's data so we can do some diffs - prev_units = prev_centers = prev_retreats = prev_homes = prev_influence = prev_cd = {} - if prev_phase_data: - prev_state = prev_phase_data.state - prev_units = prev_state.get('units', {}) - prev_centers = prev_state.get('centers', {}) - prev_retreats= prev_state.get('retreats', {}) - prev_homes = prev_state.get('homes', {}) - prev_influence= prev_state.get('influence', {}) - prev_cd = prev_state.get('civil_disorder', {}) - - # 5) Build a user prompt. We can do it in sections: - - # 5a) Orders: - orders_text = [] - for power, orders in cur_orders_dict.items(): - if orders: - orders_text.append(f"{power} => {', '.join(orders)}") - else: - orders_text.append(f"{power} => [No orders]") - orders_block = "\n".join(orders_text) if orders_text else "[No orders found]" - - # 5b) Results: - results_text = [] - for unit_name, outcomes in cur_results_dict.items(): - # old code: results_text.append(f"{unit_name}: {', '.join(outcomes)}") - outcome_strs = [str(item) for item in outcomes] - results_text.append(f"{unit_name}: {', '.join(outcome_strs)}") - - results_block = "\n".join(results_text) if results_text else "[No results found]" - # 5c) Current state (units, centers, etc.) - all powers - # We'll just do a short textual listing. You can format it more carefully as you see fit. - def dict_of_lists_to_str(title, dct): - # Helper to turn e.g. {"FRANCE": ["A MAR", "F BRE"], "ENGLAND": ["A LVP"]} into lines - lines = [] - for key, val in dct.items(): - lines.append(f" {key}: {val}") - return f"{title}:\n" + "\n".join(lines) if lines else f"{title}: [None]" - - current_state_text = [] - current_state_text.append(dict_of_lists_to_str("Units", cur_units)) - current_state_text.append(dict_of_lists_to_str("Centers", cur_centers)) - current_state_text.append(dict_of_lists_to_str("Retreats",cur_retreats)) - current_state_text.append(dict_of_lists_to_str("Homes", cur_homes)) - current_state_text.append(dict_of_lists_to_str("Influence", cur_influence)) - current_state_text.append(dict_of_lists_to_str("Civil Disorder", cur_cd)) - current_state_block = "\n\n".join(current_state_text) - - # 5d) Differences from previous (if any) - # We'll do an extremely simple approach: check if the set of items changed in each dict. - # This is purely an example. You can do more advanced diff logic if you want. - + # Build the differences info differences_info = [] if prev_phase_data: - # For each of units, centers, etc. do a quick set compare for each power - # We'll focus on e.g. newly acquired centers, newly lost centers, etc. - for power in cur_units.keys(): - # (1) Units difference: - old_units = set(prev_units.get(power, [])) - new_units = set(cur_units.get(power, [])) + prev_state = prev_phase_data.state + + for power in cur_state['units'].keys(): + # Units difference + old_units = set(prev_state.get('units', {}).get(power, [])) + new_units = set(cur_state.get('units', {}).get(power, [])) if old_units != new_units: gained = new_units - old_units - lost = old_units - new_units + lost = old_units - new_units if gained: differences_info.append(f"{power} gained units: {list(gained)}") if lost: - differences_info.append(f"{power} lost units: {list(lost)}") + differences_info.append(f"{power} lost units: {list(lost)}") - # (2) Centers difference: - old_centers = set(prev_centers.get(power, [])) - new_centers = set(cur_centers.get(power, [])) + # Centers difference + old_centers = set(prev_state.get('centers', {}).get(power, [])) + new_centers = set(cur_state.get('centers', {}).get(power, [])) if old_centers != new_centers: gained = new_centers - old_centers - lost = old_centers - new_centers + lost = old_centers - new_centers if gained: differences_info.append(f"{power} gained centers: {list(gained)}") if lost: - differences_info.append(f"{power} lost centers: {list(lost)}") - - # You can do the same for retreats, homes, influence, etc. if you want, - # or just skip them. We'll skip for brevity here. + differences_info.append(f"{power} lost centers: {list(lost)}") else: - differences_info.append("No previous phase data found, so no direct diffs to report.") + differences_info.append("Initial phase - no previous state to compare.") - differences_block = "\n".join(differences_info) or "[No changes detected from previous phase]" + differences_block = "\n".join(differences_info) or "[No significant changes detected]" - # 5e) Put it all together in the final user prompt for the LLM: + # Build the prompt focusing only on key changes user_prompt = ( f"PHASE SUMMARY REQUEST.\n\n" f"PHASE: {phase_key}\n\n" - f"ORDERS:\n{orders_block}\n\n" - f"RESULTS:\n{results_block}\n\n" - f"CURRENT BOARD STATE:\n{current_state_block}\n\n" - f"CHANGES FROM PREVIOUS PHASE:\n{differences_block}\n\n" - "Below is the final board state after the latest phase, along with the moves each power submitted and the engine's adjudication results. Please create a summary in JSON, explaining:" - "- Each successful move," - "- Each bounce or voided order, with reasons (e.g. equal force, no valid route, contradictory support)," - "- Key changes in supply centers," - "- Potential strategic ramifications if relevant." - - "Return ONLY JSON:" - - "PARSABLE OUTPUT:" - "{{" - "'summary': ... your text ..." - "}}" + f"ORDERS:\n{', '.join(f'{power}: {orders}' for power, orders in cur_orders_dict.items())}\n\n" + f"RESULTS:\n{', '.join(f'{unit}: {results}' for unit, results in cur_results_dict.items())}\n\n" + f"KEY CHANGES:\n{differences_block}\n\n" + "Please create a JSON summary explaining:\n" + "- Each successful move\n" + "- Each bounce or voided order, with reasons\n" + "- Key changes in supply centers\n" + "- Potential strategic ramifications\n\n" + "PARSABLE OUTPUT:\n" + "{\n" + "'summary': ... your text ...\n" + "}" ) - # We might also have a system prompt to guide the AI, e.g.: system_prompt = ( - """ - You are a Diplomacy expert, summarizing the results of the latest phase. - Your tasks: - 1) Provide a concise summary of how the board changed. - 2) Specifically list each voided or bounced order, and *why* it occurred. - 3) If possible, describe which moves or supports succeeded and how that affected centers. + "You are a Diplomacy expert summarizing phase results.\n" + "Focus on:\n" + "1) Key board changes\n" + "2) Failed orders and their reasons\n" + "3) Successful moves affecting centers\n\n" + """ + 1. Understanding the Phases & Their Orders - Format: - - Must return a JSON with the top-level key "summary" or "orders" or similar. - - Possibly: + 1.1. Movement Phase (phase_type == 'M') + • Hold: A PAR H (Army in Paris does nothing) + • Move: A PAR - BUR (Army in Paris moves to Burgundy) + • Support: + • Support Hold: A MAR S A PAR H (Army in Marseilles supports Army in Paris to hold) + • Support Move: A MAR S A PAR - BUR (Army in Marseilles supports Army in Paris moving to Burgundy) + • Convoy: Fleets at sea can convoy an Army over water: + • Fleet Convoy: F ION C A TUN - NAP (Fleet in Ionian Sea convoys Army from Tunis to Naples) + • Army Move via Convoy: A TUN - NAP VIA (explicitly states the Army is moving from Tunis to Naples via convoy) - PARSABLE OUTPUT: - { - "summary": "...(your textual summary)..." - } + 1.2. Retreat Phase (phase_type == 'R') + • If a unit is dislodged, it must Retreat or Disband: + • Retreat: A BUR R PIC (Dislodged Army in Burgundy retreats to Picardy) + • Disband: A BUR D (Army in Burgundy disbands, if it cannot retreat or chooses not to) - Ensure the summary clarifies reasons for bounces, e.g., "F TRI -> VEN bounced because Italy also moved A VEN -> TRI with equal force." + 1.3. Adjustment Phase (phase_type == 'A') + • Build new units if you have more centers than current units: + • A PAR B (Build an Army in Paris) + • F MAR B (Build a Fleet in Marseilles) + • Remove units if you have fewer centers than current units: + • A BUR D (Disband Army in Burgundy) + • Waive a build if you have a surplus but don’t want/can’t build: + • WAIVE (no unit is built in the available build location) - No extra text outside the JSON block. - """ + 1.4. Order Types + • H (Hold) – e.g. A PAR H + • - (Move) – e.g. A PAR - BUR + • S (Support) – e.g. A MAR S A PAR - BUR or A MAR S A PAR H + • C (Convoy) – e.g. F ION C A TUN - NAP + • R (Retreat) – e.g. A BUR R PIC + • D (Disband) – e.g. A BUR D + • B (Build) – e.g. A PAR B + • WAIVE – skipping a possible build + + 1.5. Key Phase Context + • Movement (M): Units can H, -, S, C. + • Retreat (R): Dislodged units can only R or D. + • Adjustment (A): Build/Remove units or WAIVE. + • Multi-Coast: For SPA, STP, BUL, specify nc, sc, or ec when using Fleets, e.g. F BRE - SPA(sc). + • Basic Validity Rules + • No self-support (A PAR S A PAR - BUR is invalid). + • Fleets must be on water to convoy. + • Army “- X VIA” must have one or more fleets issuing matching C A ... - X. + """ + "Example: 'F TRI -> VEN bounced due to equal force from Italy's A VEN -> TRI'" ) if summary_callback: @@ -4778,7 +4741,7 @@ class Game(Jsonable): else: summary_text = "(No LLM callback provided.)" - # 7) Store the text in the current GamePhaseData and in self.phase_summaries + # Store the summary current_phase_data.summary = summary_text self.phase_summaries[str(phase_key)] = summary_text diff --git a/lm_game.py b/lm_game.py index 13e5cb0..b63a48c 100644 --- a/lm_game.py +++ b/lm_game.py @@ -47,7 +47,7 @@ def parse_arguments(): parser.add_argument( "--max_year", type=int, - default=1905, + default=1925, help="Maximum year to simulate. The game will stop once this year is reached.", ) parser.add_argument( @@ -59,7 +59,7 @@ def parse_arguments(): parser.add_argument( "--num_negotiation_rounds", type=int, - default=1, + default=5, help="Number of negotiation rounds per phase.", ) parser.add_argument( From 6e80fcbf874876af3f0689b04fb24a5a0951c1f3 Mon Sep 17 00:00:00 2001 From: AlxAI Date: Thu, 20 Feb 2025 22:29:59 -0800 Subject: [PATCH 4/6] win condition context --- ai_diplomacy/prompts/austria_system_prompt.txt | 6 +++--- ai_diplomacy/prompts/england_system_prompt.txt | 6 +++--- ai_diplomacy/prompts/france_system_prompt.txt | 6 +++--- ai_diplomacy/prompts/germany_system_prompt.txt | 7 +++---- ai_diplomacy/prompts/italy_system_prompt.txt | 7 +++---- ai_diplomacy/prompts/russia_system_prompt.txt | 6 +++--- ai_diplomacy/prompts/system_prompt.txt | 4 ++-- ai_diplomacy/prompts/turkey_system_prompt.txt | 6 +++--- 8 files changed, 23 insertions(+), 25 deletions(-) diff --git a/ai_diplomacy/prompts/austria_system_prompt.txt b/ai_diplomacy/prompts/austria_system_prompt.txt index b54ef83..c9f17ca 100644 --- a/ai_diplomacy/prompts/austria_system_prompt.txt +++ b/ai_diplomacy/prompts/austria_system_prompt.txt @@ -1,7 +1,7 @@ -You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing supply centers, growing your army, and taking over the map. Be aggressive. +You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing 18 supply centers - this is the only path to victory. Growing your army and taking over the map are means to this end. Be aggressive and always work toward that crucial 18th center. Dear Austria, -They say you're surrounded - but that means you can strike in any direction. History shows the strongest Austrian players turn early vulnerability into mid-game dominance through decisive action, not just survival. +They say you're surrounded - but that means you can strike in any direction. History shows the strongest Austrian players turn early vulnerability into mid-game dominance through decisive action toward those vital 18 centers, not just survival. Key insights: Prevent Russia-Turkey alliance above all else @@ -31,4 +31,4 @@ You will be given: • Your units and the possible orders you may make. **Always refer to these possible_orders.** • A list of enemy units and centers. -Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the “PARSABLE OUTPUT” (your final orders or messages) will be used by the game engine. \ No newline at end of file +Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the "PARSABLE OUTPUT" (your final orders or messages) will be used by the game engine. \ No newline at end of file diff --git a/ai_diplomacy/prompts/england_system_prompt.txt b/ai_diplomacy/prompts/england_system_prompt.txt index 0817f24..5029fa5 100644 --- a/ai_diplomacy/prompts/england_system_prompt.txt +++ b/ai_diplomacy/prompts/england_system_prompt.txt @@ -1,8 +1,8 @@ -You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing supply centers, growing your army, and taking over the map. Be aggressive. +You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing 18 supply centers - this is the only path to victory. Growing your army and taking over the map are means to this end. Be aggressive and always work toward that crucial 18th center. Dear England, -Your island position tempts defensive play. Resist this. The North Sea is not a moat to hide behind, but a highway to conquest. The most successful English players use their naval superiority to project power aggressively. +Your island position tempts defensive play. Resist this. The North Sea is not a moat to hide behind, but a highway to those crucial 18 centers. The most successful English players use their naval superiority to project power aggressively. Key insights: - Secure North Sea early - it's your lifeline @@ -33,4 +33,4 @@ You will be given: • Your units and the possible orders you may make. **Always refer to these possible_orders.** • A list of enemy units and centers. -Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the “PARSABLE OUTPUT” (your final orders or messages) will be used by the game engine. \ No newline at end of file +Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the "PARSABLE OUTPUT" (your final orders or messages) will be used by the game engine. \ No newline at end of file diff --git a/ai_diplomacy/prompts/france_system_prompt.txt b/ai_diplomacy/prompts/france_system_prompt.txt index 35f6d94..184d169 100644 --- a/ai_diplomacy/prompts/france_system_prompt.txt +++ b/ai_diplomacy/prompts/france_system_prompt.txt @@ -1,8 +1,8 @@ -You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing supply centers, growing your army, and taking over the map. Be aggressive. +You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing 18 supply centers - this is the only path to victory. Growing your army and taking over the map are means to this end. Be aggressive and always work toward that crucial 18th center. Dear France, -You start in perhaps the strongest position. Don't waste it with hesitation. History shows successful French players strike early and decisively - aiming for 5-6 centers by 1902 is not just possible, but often optimal. +You start in perhaps the strongest position. Don't waste it with hesitation. History shows successful French players strike early and decisively - aiming for 5-6 centers by 1902 is not just possible, but often optimal on the path to 18. Key insights: - Early momentum is crucial - Spain, Portugal, Belgium all within reach 1901 @@ -35,4 +35,4 @@ You will be given: • Your units and the possible orders you may make. **Always refer to these possible_orders.** • A list of enemy units and centers. -Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the “PARSABLE OUTPUT” (your final orders or messages) will be used by the game engine. \ No newline at end of file +Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the "PARSABLE OUTPUT" (your final orders or messages) will be used by the game engine. \ No newline at end of file diff --git a/ai_diplomacy/prompts/germany_system_prompt.txt b/ai_diplomacy/prompts/germany_system_prompt.txt index 0217ebb..55fdc83 100644 --- a/ai_diplomacy/prompts/germany_system_prompt.txt +++ b/ai_diplomacy/prompts/germany_system_prompt.txt @@ -1,8 +1,7 @@ -You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing supply centers, growing your army, and taking over the map. Be aggressive. +You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing 18 supply centers - this is the only path to victory. Growing your army and taking over the map are means to this end. Be aggressive and always work toward that crucial 18th center. Dear Germany, - -Your central position offers unmatched opportunity - but only if you seize it. Ten centers lie within two moves of your starting position. The worst mistake? Trying to stay friendly with everyone while others grow stronger. +Your central position offers unmatched opportunity - but only if you seize it. Ten centers lie within two moves of your starting position - a strong foundation for reaching those vital 18 centers needed for victory. Key insights: - Must secure at least one strong ally early (usually England or France) @@ -33,4 +32,4 @@ You will be given: • Your units and the possible orders you may make. **Always refer to these possible_orders.** • A list of enemy units and centers. -Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the “PARSABLE OUTPUT” (your final orders or messages) will be used by the game engine. \ No newline at end of file +Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the "PARSABLE OUTPUT" (your final orders or messages) will be used by the game engine. \ No newline at end of file diff --git a/ai_diplomacy/prompts/italy_system_prompt.txt b/ai_diplomacy/prompts/italy_system_prompt.txt index f0f085f..571a769 100644 --- a/ai_diplomacy/prompts/italy_system_prompt.txt +++ b/ai_diplomacy/prompts/italy_system_prompt.txt @@ -1,8 +1,7 @@ -You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing supply centers, growing your army, and taking over the map. Be aggressive. +You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing 18 supply centers - this is the only path to victory. Growing your army and taking over the map are means to this end. Be aggressive and always work toward that crucial 18th center. Dear Italy, - -They call you the weakest power. Prove them wrong. Your position requires finesse, but victory comes to those who act, not those who wait. The successful Italian creates opportunities rather than just reacting to them. +They call you the weakest power. Prove them wrong. Your position requires finesse, but victory comes to those who act decisively toward 18 centers, not those who wait. The successful Italian creates opportunities rather than just reacting to them. Key insights: - Austria must be friend or dead (95% of early A/I wars kill both) @@ -33,4 +32,4 @@ You will be given: • Your units and the possible orders you may make. **Always refer to these possible_orders.** • A list of enemy units and centers. -Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the “PARSABLE OUTPUT” (your final orders or messages) will be used by the game engine. \ No newline at end of file +Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the "PARSABLE OUTPUT" (your final orders or messages) will be used by the game engine. \ No newline at end of file diff --git a/ai_diplomacy/prompts/russia_system_prompt.txt b/ai_diplomacy/prompts/russia_system_prompt.txt index e9726d3..387838d 100644 --- a/ai_diplomacy/prompts/russia_system_prompt.txt +++ b/ai_diplomacy/prompts/russia_system_prompt.txt @@ -1,8 +1,8 @@ -You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing supply centers, growing your army, and taking over the map. Be aggressive. +You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing 18 supply centers - this is the only path to victory. Growing your army and taking over the map are means to this end. Be aggressive and always work toward that crucial 18th center. Dear Russia, -You command the largest starting position and the most units. Don't let this abundance paralyze you with choices. The best Russian players act decisively while maintaining strategic flexibility. +You command the largest starting position and the most units. Don't let this abundance paralyze you with choices. The best Russian players act decisively while maintaining strategic flexibility on their path to 18 centers. Key insights: - You can secure two builds 1901 (Sweden/Rumania) if aggressive @@ -33,4 +33,4 @@ You will be given: • Your units and the possible orders you may make. **Always refer to these possible_orders.** • A list of enemy units and centers. -Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the “PARSABLE OUTPUT” (your final orders or messages) will be used by the game engine. \ No newline at end of file +Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the "PARSABLE OUTPUT" (your final orders or messages) will be used by the game engine. \ No newline at end of file diff --git a/ai_diplomacy/prompts/system_prompt.txt b/ai_diplomacy/prompts/system_prompt.txt index 8a6b4aa..83fca73 100644 --- a/ai_diplomacy/prompts/system_prompt.txt +++ b/ai_diplomacy/prompts/system_prompt.txt @@ -1,4 +1,4 @@ -You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing supply centers, growing your army, and taking over the map. Be aggressive. +You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing 18 supply centers - this is the only path to victory. Growing your army and taking over the map are means to this end. Be aggressive and always work toward that crucial 18th center. You will be given: • Which power you are controlling. @@ -9,4 +9,4 @@ You will be given: • Your units and the possible orders you may make. **Always refer to these possible_orders.** • A list of enemy units and centers. -Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the “PARSABLE OUTPUT” (your final orders or messages) will be used by the game engine. \ No newline at end of file +Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the "PARSABLE OUTPUT" (your final orders or messages) will be used by the game engine. \ No newline at end of file diff --git a/ai_diplomacy/prompts/turkey_system_prompt.txt b/ai_diplomacy/prompts/turkey_system_prompt.txt index c2f0924..616966c 100644 --- a/ai_diplomacy/prompts/turkey_system_prompt.txt +++ b/ai_diplomacy/prompts/turkey_system_prompt.txt @@ -1,8 +1,8 @@ -You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing supply centers, growing your army, and taking over the map. Be aggressive. +You are playing a game of Diplomacy over text. The map is the standard Diplomacy map. Your goal is to win the game by capturing 18 supply centers - this is the only path to victory. Growing your army and taking over the map are means to this end. Be aggressive and always work toward that crucial 18th center. Dear Turkey, -Your corner position is a fortress - but fortresses don't win games. The most successful Turkish players use their defensive strength as a platform for aggressive expansion, not just survival. +Your corner position is a fortress - but fortresses don't win games. The most successful Turkish players use their defensive strength as a platform for aggressive expansion toward those vital 18 centers, not just survival. Key insights: - Black Sea control is crucial - bounce or take it 1901 @@ -34,4 +34,4 @@ You will be given: • Your units and the possible orders you may make. **Always refer to these possible_orders.** • A list of enemy units and centers. -Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the “PARSABLE OUTPUT” (your final orders or messages) will be used by the game engine. \ No newline at end of file +Remember that while your private chain-of-thought can consider your in-depth reasoning about possible outcomes, **only** the "PARSABLE OUTPUT" (your final orders or messages) will be used by the game engine. \ No newline at end of file From 2693b0101469c5cca2169116b716fa9feffdfdfa Mon Sep 17 00:00:00 2001 From: AlxAI Date: Sun, 23 Feb 2025 11:18:37 -0800 Subject: [PATCH 5/6] Lots of improvements to prompting putting the right information in for negotiation and phase summaries - CONVOYS BROKEN RN --- ai_diplomacy/clients.py | 161 ++++--- ai_diplomacy/prompts/context_prompt.txt | 51 +-- ai_diplomacy/utils.py | 557 +++++++++++++++++++++++- diplomacy/engine/game.py | 6 +- lm_game.py | 32 +- 5 files changed, 701 insertions(+), 106 deletions(-) diff --git a/ai_diplomacy/clients.py b/ai_diplomacy/clients.py index e73c6bc..24a031d 100644 --- a/ai_diplomacy/clients.py +++ b/ai_diplomacy/clients.py @@ -35,7 +35,7 @@ class BaseModelClient: """ Base interface for any LLM client we want to plug in. Each must provide: - - generate_response(prompt: str) -> str + - generate_response(prompt: str) -> str (with empty_system=True if needed) - get_orders(board_state, power_name, possible_orders, game_history, phase_summaries) -> List[str] - get_conversation_reply(power_name, conversation_so_far, game_phase) -> str """ @@ -58,8 +58,8 @@ class BaseModelClient: else: # If emptysystem is True, skip loading any system prompt self.system_prompt = "" - - def generate_response(self, prompt: str) -> str: + # emptysystem defaults to false but if true will tell the LLM to not add a system prompt + def generate_response(self, prompt: str, empty_system: bool = False) -> str: """ Returns a raw string from the LLM. Subclasses override this. @@ -72,72 +72,92 @@ class BaseModelClient: board_state, power_name: str, possible_orders: Dict[str, List[str]], - game_history: GameHistory, + game_history, # Or GameHistory instance phase_summaries: Optional[Dict[str, str]] = None, ) -> str: - context = load_prompt("context_prompt.txt") + """ + Overhauled to delegate the final formatting to context_prompt.txt, inserting + placeholders for expansions (phase info, supply centers, units, blah blah). - # Get our units and centers - units_info = board_state["units"].get(power_name, []) - units_info_set = set(units_info) - centers_info = board_state["centers"].get(power_name, []) + This version is 'surgical' and uses placeholders from @context_prompt.txt + rather than building large strings in code. + """ + from ai_diplomacy.utils import ( + expand_phase_info, + format_power_units_and_centers, # Now includes neutral centers info + organize_history_by_relationship, + format_possible_orders, + format_convoy_paths, + generate_threat_assessment, + generate_sc_projection + ) + # 1) Grab the template from context_prompt.txt + template_text = load_prompt("context_prompt.txt") - # Get the current phase - year_phase = board_state["phase"] # e.g. 'S1901M' + # 2) Expand the current phase + phase_expanded = expand_phase_info(game, board_state) - # Get enemy units and centers and label them for each power - enemy_units = {} - enemy_centers = {} - for power, info in board_state["units"].items(): - if power != power_name: - enemy_units[power] = info - enemy_centers[power] = board_state["centers"].get(power, []) + # 3) Our forces (units + centers, including neutral centers) + our_forces_summary = format_power_units_and_centers(game, power_name, board_state) - # Get possible orders - possible_orders_str = "" - for loc, orders in possible_orders.items(): - possible_orders_str += f" {loc}: {orders}\n" + # 4) Summaries for enemies + enemies_forces_summary = "" + for pwr in board_state["units"]: + if pwr != power_name: + enemies_forces_summary += format_power_units_and_centers(game, pwr, board_state) - # Convoy paths - all_convoy_paths_possible = game.convoy_paths_possible - convoy_paths_possible = {} - for start_loc, fleets_req, end_loc in all_convoy_paths_possible: - for fleet in fleets_req: - if fleet in units_info_set: - convoy_paths_possible.append((start_loc, fleets_req, end_loc)) + # 5) Neutral Supply Centers + neutral_supply_centers_summary = format_power_units_and_centers(game, 'NEUTRAL', board_state) - # 1) Prepare a block of text for the phase_summaries - if phase_summaries: - historical_summaries = "\nPAST PHASE SUMMARIES:\n" - for phase_key, summary_txt in phase_summaries.items(): - historical_summaries += f"\nPHASE {phase_key}:\n{summary_txt}\n" + # 6) Gather the conversation text + if hasattr(game_history, "get_game_history"): + conversation_text = game_history.get_game_history(power_name) or "(No history yet)" else: - historical_summaries = "\n(No historical summaries yet)\n" + # Might be a plain string + conversation_text = game_history if isinstance(game_history, str) else "(No history yet)" + history_text = organize_history_by_relationship(conversation_text) - conversation_text = game_history.get_game_history(power_name) - if not conversation_text: - conversation_text = "\n(No game history yet)\n" + # 7) Format possible orders + possible_orders_text = format_possible_orders(game, possible_orders) - # Load in current context values - context = context.format( + # 8) Convoy Paths + logger.debug(f"convoy_paths_possible is: {game.convoy_paths_possible}") + #convoy_paths_text = format_convoy_paths(game, game.convoy_paths_possible) + convoy_paths_text = "" + + # 9) Threat Assessment + threat_text = generate_threat_assessment(game, board_state, power_name) + + # 10) Supply Center Projection + sc_projection_text = generate_sc_projection(game, board_state, power_name) + + # 11) Past Phase Summaries + if phase_summaries: + # Combine each phase summary for reference + lines = [] + for ph, summ in phase_summaries.items(): + lines.append(f"PHASE {ph}:\n{summ}\n") + historical_summaries = "\n".join(lines) + else: + historical_summaries = "(No historical summaries yet)" + + # 12) Plug everything into context_prompt.txt + final_prompt = template_text.format( power_name=power_name, - current_phase=year_phase, - game_map_loc_name=game.map.loc_name, - game_map_loc_type=game.map.loc_type, - map_as_adjacency_list=game.map.loc_abut, - possible_coasts=game.map.loc_coasts, - game_map_scs=game.map.scs, - game_history=conversation_text, - enemy_units=enemy_units, - enemy_centers=enemy_centers, - units_info=units_info, - centers_info=centers_info, - possible_orders=possible_orders_str, - convoy_paths_possible=convoy_paths_possible, + phase_expanded=phase_expanded, + our_forces_summary=our_forces_summary, + neutral_supply_centers_summary=neutral_supply_centers_summary, + enemies_forces_summary=enemies_forces_summary, + history_text=history_text, + possible_orders_text=possible_orders_text, + convoy_paths_text=convoy_paths_text, + threat_text=threat_text, + sc_projection_text=sc_projection_text, + historical_summaries=historical_summaries, ) - return context + return final_prompt def build_prompt( self, @@ -175,7 +195,7 @@ class BaseModelClient: possible_orders: Dict[str, List[str]], conversation_text: str, phase_summaries: Optional[Dict[str, str]] = None, - model_error_stats=None, # New optional param + model_error_stats=None, ) -> List[str]: """ 1) Builds the prompt with conversation context if available @@ -207,7 +227,10 @@ class BaseModelClient: f"[{self.model_name}] Could not extract moves for {power_name}. Using fallback." ) if model_error_stats is not None: - model_error_stats[self.model_name]["order_decoding_errors"] += 1 + # forcibly convert sets to string + model_name_for_stats = str(self.model_name) + model_error_stats[model_name_for_stats]["order_decoding_errors"] += 1 + return self.fallback_orders(possible_orders) # Validate or fallback validated_moves = self._validate_orders(move_list, possible_orders) @@ -215,6 +238,11 @@ class BaseModelClient: except Exception as e: logger.error(f"[{self.model_name}] LLM error for {power_name}: {e}") + if model_error_stats is not None: + # forcibly convert sets to string + model_name_for_stats = str(self.model_name) + model_error_stats[model_name_for_stats]["order_decoding_errors"] += 1 + return self.fallback_orders(possible_orders) def _extract_moves(self, raw_response: str, power_name: str) -> Optional[List[str]]: @@ -496,13 +524,13 @@ class OpenAIClient(BaseModelClient): super().__init__(model_name, power_name, emptysystem) self.client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) - def generate_response(self, prompt: str) -> str: + def generate_response(self, prompt: str, empty_system: bool = False) -> str: # Updated to new API format try: response = self.client.chat.completions.create( model=self.model_name, messages=[ - {"role": "system", "content": self.system_prompt}, + {"role": "system", "content": self.system_prompt if not empty_system else ""}, {"role": "user", "content": prompt}, ], ) @@ -533,13 +561,13 @@ class ClaudeClient(BaseModelClient): super().__init__(model_name, power_name, emptysystem) self.client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")) - def generate_response(self, prompt: str) -> str: + def generate_response(self, prompt: str, empty_system: bool = False) -> str: # Updated Claude messages format try: response = self.client.messages.create( model=self.model_name, max_tokens=2000, - system=self.system_prompt, # system is now a top-level parameter + system=self.system_prompt if not empty_system else "", messages=[{"role": "user", "content": prompt}], ) if not response.content: @@ -569,8 +597,11 @@ class GeminiClient(BaseModelClient): super().__init__(model_name, power_name, emptysystem) self.client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY")) - def generate_response(self, prompt: str) -> str: - full_prompt = self.system_prompt + prompt + def generate_response(self, prompt: str, empty_system: bool = False) -> str: + if empty_system: + full_prompt = prompt + else: + full_prompt = self.system_prompt + prompt try: response = self.client.models.generate_content( @@ -600,12 +631,12 @@ class DeepSeekClient(BaseModelClient): api_key=self.api_key, base_url="https://api.deepseek.com/" ) - def generate_response(self, prompt: str) -> str: + def generate_response(self, prompt: str, empty_system: bool = False) -> str: try: response = self.client.chat.completions.create( model=self.model_name, messages=[ - {"role": "system", "content": self.system_prompt}, + {"role": "system", "content": self.system_prompt if not empty_system else ""}, {"role": "user", "content": prompt}, ], stream=False, @@ -672,7 +703,7 @@ def load_model_client(model_id: str, power_name: Optional[str] = None, emptysyst return DeepSeekClient(model_id, power_name, emptysystem=emptysystem) else: # Default to OpenAI - return OpenAIClient(model_id, power_name, emptysystem=emptysystem) + return OpenAIClient(model_id, power_name) ############################################################################## diff --git a/ai_diplomacy/prompts/context_prompt.txt b/ai_diplomacy/prompts/context_prompt.txt index 1b48b36..ed9a136 100644 --- a/ai_diplomacy/prompts/context_prompt.txt +++ b/ai_diplomacy/prompts/context_prompt.txt @@ -1,45 +1,32 @@ **PLAYER DETAILS** Power: {power_name} -Current phase: {current_phase} +Current phase: {phase_expanded} -**MAP DETAILS** +**HISTORY OF COMMUNICATION** -Abbreviations: -{game_map_loc_name} +{history_text} -Type of each location: -{game_map_loc_type} +**YOUR FORCES** +{our_forces_summary} -Game map as an adjacency list: -{map_as_adjacency_list} +**ENEMY FORCES** +{enemies_forces_summary} -Possible coasts at each location: -{possible_coasts} +**NEUTRAL SUPPLY CENTERS** +{neutral_supply_centers_summary} -All supply centers on the map: -{game_map_scs} +**THREAT ASSESSMENT** +{threat_text} -**GAME HISTORY** +**SUPPLY CENTER PROJECTION** +{sc_projection_text} -{game_history} +**PAST PHASE SUMMARIES** +{historical_summaries} -**CURRENT CONTEXT** +**POSSIBLE ORDERS** +{possible_orders_text} -Enemy units: -{enemy_units} - -Enemy supply centers: -{enemy_centers} - -Your units: -{units_info} - -Your supply centers: -{centers_info} - -Possible orders: -{possible_orders} - -Convoy paths possible: -{convoy_paths_possible} \ No newline at end of file +**CONVOY PATHS** +{convoy_paths_text} \ No newline at end of file diff --git a/ai_diplomacy/utils.py b/ai_diplomacy/utils.py index 3df0791..0d11c63 100644 --- a/ai_diplomacy/utils.py +++ b/ai_diplomacy/utils.py @@ -15,7 +15,7 @@ def assign_models_to_powers(randomize=True): Return a dict: { power_name: model_id, ... } """ # If True, we'll randomize the model assignment. - model_list = [ + """model_list = [ "o3-mini", "claude-3-5-sonnet-20241022", "gemini-2.0-flash", @@ -23,6 +23,15 @@ def assign_models_to_powers(randomize=True): "gpt-4o", "gpt-4o-mini", "claude-3-5-haiku-20241022", + ]""" + model_list = [ + "o3-mini", + "gemini-1.5-flash", + "gemini-2.0-flash", + "gemini-2.0-flash-lite-preview-02-05", + "gpt-3.5-turbo", + "gpt-4o-mini", + "claude-3-5-haiku-20241022", ] POWERS = ['AUSTRIA', 'ENGLAND', 'FRANCE', 'GERMANY', 'ITALY', 'RUSSIA', 'TURKEY'] if randomize: @@ -114,3 +123,549 @@ def get_valid_orders( model_error_stats[power_name]["order_decoding_errors"] += 1 fallback = client.fallback_orders(possible_orders) return fallback + + +def expand_phase_info(game, board_state): + """ + Convert a phase like 'S1901M' into a more descriptive string: + 'Spring 1901 Movement (early game): Units can move, support, or convoy...' + This function also references the current year to classify early/mid/late game. + """ + phase_abbrev = board_state["phase"] # e.g. 'S1901M' + # Basic mapping of abbreviations + season_map = { + 'S': "Spring", + 'F': "Fall", + 'W': "Winter", + } + phase_type_map = { + 'M': "Movement", + 'R': "Retreat", + 'A': "Adjustment", # builds/disbands + } + + season_char = phase_abbrev[0] # S / F / W + year = int(phase_abbrev[1:5]) # 1901 + phase_char = phase_abbrev[-1] # M / R / A + + season_str = season_map.get(season_char, "Unknown Season") + phase_str = phase_type_map.get(phase_char, "Unknown Phase") + + # Approximate game stage + if year <= 1902: + stage = "early game" + elif year <= 1906: + stage = "mid game" + else: + stage = "late game" + + # Phase-specific action text + if phase_char == 'M': + actions = "Players issue move, support, or convoy orders." + elif phase_char == 'R': + actions = "Dislodged units must retreat or disband." + elif phase_char == 'A': + actions = "Powers may build new units if they have more centers than units, otherwise disband if fewer." + else: + actions = "Unknown phase actions." + + return f"{season_str} {year} {phase_str} ({stage}): {actions}" + + +def format_location_with_expansion(game, loc, include_adjacency=False): + """ + Return a string like 'Paris (PAR) [LAND]', + optionally including a list of adjacent locations if include_adjacency=True. + """ + full_name = next((name for name, abbrev in game.map.loc_name.items() if abbrev == loc), loc) + loc_type = game.map.loc_type.get(loc, "UNKNOWN") + formatted = f"{full_name} ({loc}) [{loc_type}]" + + if include_adjacency: + adjacent_locs = game.map.loc_abut.get(loc, []) + if adjacent_locs: + adjacent_info = [] + for adj_loc in adjacent_locs: + adj_full_name = game.map.loc_name.get(adj_loc, adj_loc) + adj_type = game.map.loc_type.get(adj_loc, "UNKNOWN") + adjacent_info.append(f"{adj_full_name} ({adj_loc}) [{adj_type}]") + formatted += f"\n Adjacent to: {', '.join(adjacent_info)}" + + return formatted + + +def format_power_units_and_centers(game, power_name, board_state): + """ + Show a summarized view of a given power's units and supply centers, + with expansions of location names, plus a quick 'strength' count. + Also includes information about neutral centers. + """ + # Add neutral centers info + if power_name == "NEUTRAL": + all_controlled = set() + for centers in board_state["centers"].values(): + all_controlled.update(centers) + neutral_centers = [sc for sc in game.map.scs if sc not in all_controlled] + + if neutral_centers: + output = " Neutral Supply Centers:\n" + for c in neutral_centers: + output += f" {format_location_with_expansion(game, c)}\n" + else: + units_info = board_state["units"].get(power_name, []) + centers_info = board_state["centers"].get(power_name, []) + + output = f"{power_name} FORCES:\n" + + if units_info: + output += " Units:\n" + for unit in units_info: + # Example unit: "A PAR" + # First char is 'A' or 'F'; substring after space is the location + parts = unit.split(" ", 1) + if len(parts) == 2: + unit_type, loc = parts + output += f" {unit_type} in {format_location_with_expansion(game, loc)}\n" + else: + output += f" {unit}\n" + else: + output += " Units: None\n" + + if centers_info: + output += " Supply Centers:\n" + for c in centers_info: + output += f" {format_location_with_expansion(game, c)}\n" + else: + output += " Supply Centers: None\n" + + + # Summaries + output += f" Current Strength: {len(centers_info)} centers, {len(units_info)} units\n\n" + return output + + +def organize_history_by_relationship(conversation_text: str) -> str: + """ + This simplified version takes the entire conversation text + (e.g., from game_history.get_game_history(power_name)) and returns it. + + Previously, we assumed we had a structured list of messages, but in practice, + game_history is just a string, so we skip relationship-based grouping. + + In the future, if 'GameHistory' becomes more structured, we can parse it here. + """ + if not conversation_text.strip(): + return "(No game history yet)\n" + + # For now, we can simply return the conversation text + # or do minimal formatting as we see fit. + output = "COMMUNICATION HISTORY:\n\n" + output += conversation_text.strip() + "\n" + return output + + +def format_possible_orders(game, possible_orders): + """ + Display orders with strategic context, maintaining the exact order syntax + while adding meaningful descriptions about their tactical purpose. + """ + # First pass - analyze game state for strategic context + supply_centers = set(game.map.scs) + power_centers = {} + contested_regions = set() + + # Gather supply center ownership + for power_name, centers in game.get_centers().items(): + for center in centers: + power_centers[center] = power_name + + # Identify contested regions (simplified approach) + # A more sophisticated implementation would analyze unit adjacencies + + # Classify orders by strategic purpose + strategic_orders = { + "OFFENSIVE": [], # Orders that can capture centers or threaten enemy units + "DEFENSIVE": [], # Orders that protect your centers or units + "TACTICAL": [], # Orders that improve position without immediate captures + "SUPPORT": [] # Support orders + } + + # Process each order + for loc, orders in possible_orders.items(): + for order in orders: + order_parts = order.split() + order_type = None + + # Determine order type + if " H" in order: + order_type = "DEFENSIVE" + elif " S " in order: + order_type = "SUPPORT" + elif " - " in order: + # Get destination + dest = order_parts[-1].split(" VIA")[0] if " VIA" in order else order_parts[-1] + + # Check if destination is a supply center + if dest[:3] in supply_centers: + # If center is neutral or enemy-owned, it's offensive + if dest[:3] not in power_centers or power_centers[dest[:3]] != game.role: + order_type = "OFFENSIVE" + else: + order_type = "DEFENSIVE" # Moving to own supply center + else: + order_type = "TACTICAL" # Non-center destination + elif " C " in order: + order_type = "SUPPORT" # Classify convoy as support + + # Generate strategic description + description = generate_order_description(game, order, order_type, power_centers, supply_centers) + + # Add to appropriate category + if order_type: + strategic_orders[order_type].append((order, description)) + + # Generate formatted output + output = "POSSIBLE ORDERS:\n\n" + + # Add offensive moves first - these are highest priority + if strategic_orders["OFFENSIVE"]: + output += "Offensive Moves (capture territory):\n" + for order, desc in strategic_orders["OFFENSIVE"]: + output += f" {order} {desc}\n" + output += "\n" + + # Add defensive moves + if strategic_orders["DEFENSIVE"]: + output += "Defensive Moves (protect territory):\n" + for order, desc in strategic_orders["DEFENSIVE"]: + output += f" {order} {desc}\n" + output += "\n" + + # Add tactical positioning moves + if strategic_orders["TACTICAL"]: + output += "Tactical Moves (improve position):\n" + for order, desc in strategic_orders["TACTICAL"]: + output += f" {order} {desc}\n" + output += "\n" + + # Add support moves + if strategic_orders["SUPPORT"]: + output += "Support Options (strengthen attacks/defense):\n" + for order, desc in strategic_orders["SUPPORT"]: + output += f" {order} {desc}\n" + + return output + + +def generate_order_description(game, order, order_type, power_centers, supply_centers): + """ + Generate a strategic description for an order based on its type and context. + """ + order_parts = order.split() + + # Hold orders + if order_type == "DEFENSIVE" and " H" in order: + unit_loc = order_parts[1] + if unit_loc[:3] in supply_centers: + if unit_loc[:3] in power_centers and power_centers[unit_loc[:3]] == game.role: + return "(secure your supply center)" + else: + return "(maintain position at supply center)" + return "(maintain strategic position)" + + # Move orders + elif order_type in ["OFFENSIVE", "TACTICAL", "DEFENSIVE"] and " - " in order: + unit_type = order_parts[0] # A or F + unit_loc = order_parts[1] + dest = order_parts[3].split(" VIA")[0] if len(order_parts) > 3 and "VIA" in order_parts[-1] else order_parts[3] + + # Moving to a supply center + if dest[:3] in supply_centers: + if dest[:3] not in power_centers: + return f"(capture neutral supply center)" + else: + target_power = power_centers[dest[:3]] + return f"(attack {target_power}'s supply center)" + + # Moving to a non-supply center + if unit_type == "A": + # Army moves to tactical positions + return f"(strategic positioning)" + else: + # Fleet moves often about sea control + return f"(secure sea route)" + + # Support orders + elif order_type == "SUPPORT" and " S " in order: + # Find the unit being supported and its action + supported_part = " ".join(order_parts[3:]) + + if " - " in supported_part: + # Supporting a move + supported_unit = order_parts[3] + supported_dest = order_parts[-1] + + if supported_dest[:3] in supply_centers: + if supported_dest[:3] not in power_centers: + return f"(support capture of neutral center)" + else: + target_power = power_centers[supported_dest[:3]] + return f"(strengthen attack on {target_power})" + return "(strengthen attack)" + else: + # Supporting a hold + return "(reinforce defense)" + + # Convoy orders + elif " C " in order: + return "(enable army transport by sea)" + + # Default + return "" + + +def format_convoy_paths(game, convoy_paths_possible): + """ + Format convoy paths in a strategically meaningful way, + grouping by region and highlighting strategic objectives. + + Input format: + [('START', {required fleets}, {possible destinations}), ...] + + Example tuple: ('ALB', {'ION'}, {'GRE', 'APU', 'NAP', 'TUN'}) + """ + if not convoy_paths_possible: + return "CONVOY POSSIBILITIES: None currently available.\n" + + # Group convoy paths by general region + regional_paths = { + "MEDITERRANEAN": [], # Central/Southern paths + "NORTH SEA": [], # Northern European paths + "BLACK SEA": [], # Eastern paths + "COMPLEX": [] # Multi-fleet convoys + } + + # Supply centers for context + supply_centers = set(game.map.scs) + power_centers = {power_name: set(centers) for power_name, centers in game.get_centers().items()} + neutral_centers = supply_centers - set().union(*power_centers.values()) + + # Current power for context + current_power = game.role if hasattr(game, 'role') else None + + # Map locations to regions + mediterranean_waters = {"ION", "TYS", "WES", "ADR", "AEG", "EAS", "LYO"} + north_sea_waters = {"NTH", "NWG", "ENG", "IRI", "SKA", "HEL", "BAL", "BOT", "BAR"} + black_sea_waters = {"BLA"} + + # Process each convoy path + for path in convoy_paths_possible: + start_loc, required_fleets, destinations = path + + # Skip if no destinations + if not destinations: + continue + + # Determine region + region = "COMPLEX" # Default + if len(required_fleets) == 1: + fleet_loc = next(iter(required_fleets)) + if fleet_loc in mediterranean_waters: + region = "MEDITERRANEAN" + elif fleet_loc in north_sea_waters: + region = "NORTH SEA" + elif fleet_loc in black_sea_waters: + region = "BLACK SEA" + + # Add strategic context for each destination + for dest in destinations: + strategic_note = _get_convoy_destination_context( + game, start_loc, dest, supply_centers, power_centers, + neutral_centers, current_power + ) + + # Format info about required fleets + if len(required_fleets) == 1: + fleet_info = f"via {next(iter(required_fleets))}" + else: + fleet_info = f"via {' + '.join(required_fleets)}" + + # Create entry + entry = (start_loc, dest, fleet_info, strategic_note) + regional_paths[region].append(entry) + + # Format the output + output = "CONVOY POSSIBILITIES:\n\n" + + # Show each region + for region, paths in regional_paths.items(): + if not paths: + continue + + output += f"{region} CONVOYS:\n" + + # Group by start location + by_start = {} + for start, dest, fleet_info, note in paths: + by_start.setdefault(start, []).append((dest, fleet_info, note)) + + # Format each start location's options + for start, destinations in by_start.items(): + start_name = game.map.loc_name.get(start, start) + output += f" From {start_name} ({start}):\n" + + for dest, fleet_info, note in destinations: + dest_name = game.map.loc_name.get(dest, dest) + output += f" A {start} - {dest} {fleet_info} ({note})\n" + + output += "\n" + + return output + + +def _get_convoy_destination_context(game, start, dest, supply_centers, power_centers, neutral_centers, current_power): + """Generate strategic context for convoy destinations""" + start_base = start[:3] # Remove any coast specification + dest_base = dest[:3] # Remove any coast specification + + # Check if destination is a supply center + if dest_base in supply_centers: + if dest_base in neutral_centers: + return f"capture neutral SC {game.map.loc_name.get(dest_base, dest_base)}" + + for power, centers in power_centers.items(): + if dest_base in centers: + if power == current_power: + return f"reinforce your SC {game.map.loc_name.get(dest_base, dest_base)}" + else: + return f"attack {power}'s SC {game.map.loc_name.get(dest_base, dest_base)}" + + # Check for strategic positioning + # Major strategic locations that aren't supply centers + strategic_positions = { + "RUH": "central position threatening multiple German SCs", + "BUR": "central position threatening both France and Germany", + "UKR": "strategic buffer between Russia and Austria-Hungary", + "BOH": "central position for attacking Austria", + "TYR": "mountain pass to either Venice or Munich", + "PIE": "bridgehead into both France and Italy", + "SYR": "buffer protecting Turkey's eastern flank" + } + + if dest_base in strategic_positions: + return strategic_positions[dest_base] + + # By default, highlight the unconventional movement + src_type = game.map.area_type.get(start_base, "") + dest_type = game.map.area_type.get(dest_base, "") + + if src_type == "COAST" and dest_type == "COAST": + return f"bypass land barriers for surprise positioning" + + return f"strategic repositioning" + + +def generate_threat_assessment(game, board_state, power_name): + """ + High-level function that tries to identify immediate threats + from adjacent enemy units to your units or centers. + """ + our_units = set(loc.split(" ", 1)[1] for loc in board_state["units"].get(power_name, [])) + our_centers = set(board_state["centers"].get(power_name, [])) + + threats = [] + for enemy_power, enemy_units in board_state["units"].items(): + if enemy_power == power_name: + continue + for unit_code in enemy_units: + try: + # e.g. "A MUN" + parts = unit_code.split(" ", 1) + enemy_loc = parts[1].strip() + except IndexError: + continue + + # check adjacency to our units or centers + neighbors = game.map.loc_abut.get(enemy_loc, []) + threatened = [] + for nbr in neighbors: + if nbr in our_units: + threatened.append(f"our unit @ {nbr}") + elif nbr in our_centers: + threatened.append(f"our center @ {nbr}") + + if threatened: + threats.append((enemy_power, unit_code, threatened)) + + output = "THREAT ASSESSMENT:\n" + if not threats: + output += " No immediate threats detected.\n\n" + return output + + for (enemy_pwr, code, targets) in threats: + output += f" {enemy_pwr}'s {code} threatens {', '.join(targets)}\n" + output += "\n" + return output + + +def generate_sc_projection(game, board_state, power_name): + """ + Estimate potential gains from neutral or weakly held enemy SCs, plus + highlight which of your centers are at risk (no unit present). + """ + our_units = set(loc.split(" ", 1)[1] for loc in board_state["units"].get(power_name, [])) + our_centers = set(board_state["centers"].get(power_name, [])) + all_centers_control = board_state["centers"] # dict of power -> list of centers + all_controlled = set() + for c_list in all_centers_control.values(): + all_controlled.update(c_list) + + # Potential neutral SC gains + neutral_gains = [] + for sc in game.map.scs: + if sc not in all_controlled: # neutral + # see if we have a unit adjacent + neighbors = game.map.loc_abut.get(sc, []) + if any(nbr in our_units for nbr in neighbors): + neutral_gains.append(sc) + + # Weakly held enemy SC + contestable = [] + for e_pwr, e_centers in board_state["centers"].items(): + if e_pwr == power_name: + continue + enemy_units = set(loc.split(" ", 1)[1] for loc in board_state["units"].get(e_pwr, [])) + for c in e_centers: + # if no enemy unit is physically there + if c not in enemy_units: + # see if we have a unit adjacent + neighbors = game.map.loc_abut.get(c, []) + if any(nbr in our_units for nbr in neighbors): + contestable.append((c, e_pwr)) + + # Our centers at risk (no unit present) + at_risk = [own_sc for own_sc in our_centers if own_sc not in our_units] + + # Format final + output = "SUPPLY CENTER PROJECTION:\n" + output += f" Current Count: {len(our_centers)}\n" + + if neutral_gains: + output += " Potential neutral gains:\n" + for sc in neutral_gains: + output += f" {format_location_with_expansion(game, sc)}\n" + + if contestable: + output += " Contestable enemy centers:\n" + for c, e_pwr in contestable: + output += f" {format_location_with_expansion(game, c)} (currently owned by {e_pwr})\n" + + if at_risk: + output += " Centers at risk (no defending unit):\n" + for sc in at_risk: + output += f" {format_location_with_expansion(game, sc)}\n" + + best_case = len(our_centers) + len(neutral_gains) + len(contestable) + worst_case = len(our_centers) - len(at_risk) + output += f" Next-phase range: {worst_case} to {best_case} centers\n\n" + return output diff --git a/diplomacy/engine/game.py b/diplomacy/engine/game.py index 91fc0e6..ac7d000 100644 --- a/diplomacy/engine/game.py +++ b/diplomacy/engine/game.py @@ -46,9 +46,9 @@ UNDETERMINED, POWER, UNIT, LOCATION, COAST, ORDER, MOVE_SEP, OTHER = 0, 1, 2, 3, LOGGER = logging.getLogger(__name__) # set logging level to INFO -logging.basicConfig(level=logging.INFO) +#logging.basicConfig(level=logging.INFO) # set logging level to DEBUG -#logging.basicConfig(level=logging.DEBUG) +logging.basicConfig(level=logging.DEBUG) class Game(Jsonable): """ Game class. @@ -4604,7 +4604,7 @@ class Game(Jsonable): if idx > 0: prev_phase_key = all_phases[idx - 1] logging.debug( - "DEBUG _generate_phase_summary: Using prev_phase_key=%s (idx-2). If skipping a sub-phase is undesired, consider (idx-1).", + "DEBUG _generate_phase_summary: Using prev_phase_key=%s (idx-1).", prev_phase_key ) try: diff --git a/lm_game.py b/lm_game.py index b63a48c..74383cf 100644 --- a/lm_game.py +++ b/lm_game.py @@ -37,7 +37,7 @@ def my_summary_callback(system_prompt, user_prompt, model_name): client = load_model_client(model_name, emptysystem=True) combined_prompt = f"{system_prompt}\n\n{user_prompt}" # Pseudo-code for generating a response: - return client.generate_response(combined_prompt) + return client.generate_response(combined_prompt, empty_system=True) def parse_arguments(): @@ -47,7 +47,7 @@ def parse_arguments(): parser.add_argument( "--max_year", type=int, - default=1925, + default=1910, help="Maximum year to simulate. The game will stop once this year is reached.", ) parser.add_argument( @@ -121,9 +121,7 @@ def main(): max_year = args.max_year summary_model = args.summary_model - logger.info( - "Starting a new Diplomacy game for testing with multiple LLMs, now concurrent!" - ) + logger.info("Starting a new Diplomacy game for testing with multiple LLMs, now concurrent!") start_whole = time.time() model_error_stats = defaultdict( @@ -143,6 +141,18 @@ def main(): result_folder = f"./results/{timestamp_str}" os.makedirs(result_folder, exist_ok=True) + # --------------------------- + # ADD FILE HANDLER FOR LOGS + # --------------------------- + log_file_path = os.path.join(result_folder, "game.log") + file_handler = logging.FileHandler(log_file_path) + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter( + logging.Formatter("%(asctime)s [%(levelname)s] %(name)s - %(message)s", datefmt="%H:%M:%S") + ) + logger.addHandler(file_handler) + logger.info(f"File handler added. Writing logs to {log_file_path}.") + # File paths manifesto_path = f"{result_folder}/game_manifesto.txt" # Use provided output filename or generate one based on the timestamp @@ -171,6 +181,18 @@ def main(): else: game.power_model_map = assign_models_to_powers(randomize=True) + logger.debug("Power model assignments:") + for power, model_id in game.power_model_map.items(): + logger.debug(f"{power} => type={type(model_id)}, value={model_id}") + + # Also, if you prefer to fix the negotiation function: + # We could do a one-liner ensuring all model_id are strings: + for p in game.power_model_map: + if not isinstance(game.power_model_map[p], str): + game.power_model_map[p] = str(game.power_model_map[p]) + + logger.info("Post-cleanup: Verified all power model IDs are strings.") + round_counter = 0 # Track number of rounds while not game.is_game_done: From b54a8252d6b4f693c827385dc97be85aa6e36488 Mon Sep 17 00:00:00 2001 From: AlxAI Date: Sun, 23 Feb 2025 18:18:47 -0800 Subject: [PATCH 6/6] fix convoy first attempt at summaries --- ai_diplomacy/clients.py | 57 ++-- ai_diplomacy/long_story_short.py | 313 ++++++++++++++++++ ai_diplomacy/model_loader.py | 32 ++ .../prompts/message_summary_prompt.txt | 25 ++ ai_diplomacy/prompts/phase_summary_prompt.txt | 25 ++ ai_diplomacy/utils.py | 196 ++++------- lm_game.py | 11 +- 7 files changed, 497 insertions(+), 162 deletions(-) create mode 100644 ai_diplomacy/long_story_short.py create mode 100644 ai_diplomacy/model_loader.py create mode 100644 ai_diplomacy/prompts/message_summary_prompt.txt create mode 100644 ai_diplomacy/prompts/phase_summary_prompt.txt diff --git a/ai_diplomacy/clients.py b/ai_diplomacy/clients.py index 24a031d..563c727 100644 --- a/ai_diplomacy/clients.py +++ b/ai_diplomacy/clients.py @@ -19,6 +19,8 @@ from google import genai from diplomacy.engine.message import GLOBAL from .game_history import GameHistory +from .long_story_short import get_optimized_context +from .model_loader import load_model_client # set logger back to just info logger = logging.getLogger("client") @@ -110,21 +112,33 @@ class BaseModelClient: neutral_supply_centers_summary = format_power_units_and_centers(game, 'NEUTRAL', board_state) # 6) Gather the conversation text + raw_conversation_text = "" if hasattr(game_history, "get_game_history"): - conversation_text = game_history.get_game_history(power_name) or "(No history yet)" + raw_conversation_text = game_history.get_game_history(power_name) or "(No history yet)" else: # Might be a plain string - conversation_text = game_history if isinstance(game_history, str) else "(No history yet)" + raw_conversation_text = game_history if isinstance(game_history, str) else "(No history yet)" - history_text = organize_history_by_relationship(conversation_text) + # Organize history by relationship + organized_history = organize_history_by_relationship(raw_conversation_text) + + # Get optimized context (summaries if needed) + optimized_phases, optimized_messages = get_optimized_context( + game, + game_history, + power_name, + organized_history + ) + + # Use the optimized message history + history_text = optimized_messages # 7) Format possible orders possible_orders_text = format_possible_orders(game, possible_orders) # 8) Convoy Paths logger.debug(f"convoy_paths_possible is: {game.convoy_paths_possible}") - #convoy_paths_text = format_convoy_paths(game, game.convoy_paths_possible) - convoy_paths_text = "" + convoy_paths_text = format_convoy_paths(game, game.convoy_paths_possible, power_name) # 9) Threat Assessment threat_text = generate_threat_assessment(game, board_state, power_name) @@ -133,11 +147,15 @@ class BaseModelClient: sc_projection_text = generate_sc_projection(game, board_state, power_name) # 11) Past Phase Summaries - if phase_summaries: + if optimized_phases: # Combine each phase summary for reference lines = [] - for ph, summ in phase_summaries.items(): - lines.append(f"PHASE {ph}:\n{summ}\n") + for ph, summ in optimized_phases.items(): + # Check if this is a summary entry + if ph.startswith("SUMMARY_UNTIL_"): + lines.append(f"HISTORICAL SUMMARY (until {ph[13:]}):\n{summ}\n") + else: + lines.append(f"PHASE {ph}:\n{summ}\n") historical_summaries = "\n".join(lines) else: historical_summaries = "(No historical summaries yet)" @@ -683,29 +701,6 @@ class DeepSeekClient(BaseModelClient): return "" -############################################################################## -# 3) Factory to Load Model Client -############################################################################## - - -def load_model_client(model_id: str, power_name: Optional[str] = None, emptysystem: bool = False) -> BaseModelClient: - """ - Returns the appropriate LLM client for a given model_id string, optionally keyed by power_name. - Example usage: - client = load_model_client("claude-3-5-sonnet-20241022", power_name="FRANCE", emptysystem=True) - """ - lower_id = model_id.lower() - if "claude" in lower_id: - return ClaudeClient(model_id, power_name, emptysystem=emptysystem) - elif "gemini" in lower_id: - return GeminiClient(model_id, power_name, emptysystem=emptysystem) - elif "deepseek" in lower_id: - return DeepSeekClient(model_id, power_name, emptysystem=emptysystem) - else: - # Default to OpenAI - return OpenAIClient(model_id, power_name) - - ############################################################################## # 4) Example Usage in a Diplomacy "main" or Similar ############################################################################## diff --git a/ai_diplomacy/long_story_short.py b/ai_diplomacy/long_story_short.py new file mode 100644 index 0000000..26f448c --- /dev/null +++ b/ai_diplomacy/long_story_short.py @@ -0,0 +1,313 @@ +import logging +import re +import os +import time +from typing import Dict, List, Optional, Tuple, Any + +# Establish logger +logger = logging.getLogger(__name__) + +# Import model client for summarization +from ai_diplomacy.model_loader import load_model_client + +# Token counting approximation +def count_tokens(text: str) -> int: + """ + Approximates token count for text. This is a rough estimate. + OpenAI tokens are ~4 chars per token on average. + """ + return len(text) // 4 # Simple approximation + + +class ContextManager: + """ + Manages context size for Diplomacy game history and messages. + Provides summarization functionality when context exceeds thresholds. + """ + def __init__( + self, + phase_token_threshold: int = 5000, + message_token_threshold: int = 5000, + summary_model: str = "o3-mini" + ): + self.phase_token_threshold = phase_token_threshold + self.message_token_threshold = message_token_threshold + self.summary_model = summary_model + + # Cache for summaries - prevents regenerating summaries unnecessarily + self.phase_summary_cache = {} + self.message_summary_cache = {} + + # Track when we last generated summaries + self.last_phase_summary_time = 0 + self.last_message_summary_time = 0 + + # Cooldown period (seconds) - don't summarize more frequently than this + self.summary_cooldown = 300 # 5 minutes + + def load_summarization_prompts(self) -> Tuple[str, str]: + """ + Load prompts for phase and message summarization. + Returns tuple of (phase_prompt, message_prompt) + """ + try: + # Try to load from files + with open("./ai_diplomacy/prompts/phase_summary_prompt.txt", "r") as f: + phase_prompt = f.read().strip() + + with open("./ai_diplomacy/prompts/message_summary_prompt.txt", "r") as f: + message_prompt = f.read().strip() + + return phase_prompt, message_prompt + except FileNotFoundError: + # Return default prompts if files not found + logger.warning("Summarization prompt files not found. Using defaults.") + + phase_prompt = """You are summarizing the history of a Diplomacy game. +Create a concise summary that preserves all strategically relevant information about: +1. Supply center changes +2. Unit movements and their results +3. Key battles and their outcomes +4. Territory control shifts + +Focus on what actually happened, not explanations or justifications. +Maintain the chronological structure but condense verbose descriptions. +Use clear, factual language with specific location names. + +ORIGINAL PHASE HISTORY: +{phase_history} + +SUMMARY:""" + + message_prompt = """You are summarizing diplomatic messages in a Diplomacy game. +Create a concise summary of the conversations between powers that preserves: +1. Agreements and alliances formed +2. Betrayals and broken promises +3. Strategic intentions revealed +4. Explicit threats or support offered +5. Key relationships between each power + +Organize by relationships (e.g., FRANCE-GERMANY, ENGLAND-RUSSIA), prioritizing the most +significant interactions. Include specific territory names mentioned. + +The summary must reflect the actual diplomatic landscape accurately so players can make informed decisions. + +ORIGINAL MESSAGE HISTORY: +{message_history} + +SUMMARY:""" + + return phase_prompt, message_prompt + + def should_summarize_phases(self, phase_summaries: Dict[str, str]) -> bool: + """ + Determine if phase summaries need to be condensed based on token count + and cooldown period. + """ + # Check if we're in cooldown period + current_time = time.time() + if current_time - self.last_phase_summary_time < self.summary_cooldown: + return False + + # Join all summaries to count total tokens + all_text = "\n\n".join(phase_summaries.values()) + token_count = count_tokens(all_text) + + return token_count > self.phase_token_threshold + + def should_summarize_messages(self, message_history: str) -> bool: + """ + Determine if message history needs to be condensed based on token count + and cooldown period. + """ + # Check if we're in cooldown period + current_time = time.time() + if current_time - self.last_message_summary_time < self.summary_cooldown: + return False + + token_count = count_tokens(message_history) + return token_count > self.message_token_threshold + + def summarize_phase_history(self, phase_summaries: Dict[str, str], power_name: Optional[str] = None) -> Dict[str, str]: + """ + Create a condensed version of phase summaries. + Keeps the most recent phases intact and summarizes older ones. + + Returns a new dictionary with condensed history. + """ + if not self.should_summarize_phases(phase_summaries): + return phase_summaries + + # Mark summarization time + self.last_phase_summary_time = time.time() + + # Sort phases chronologically + sorted_phases = sorted(phase_summaries.keys()) + + # Keep the 3 most recent phases intact + recent_phases = sorted_phases[-3:] if len(sorted_phases) > 3 else sorted_phases + older_phases = sorted_phases[:-3] if len(sorted_phases) > 3 else [] + + if not older_phases: + return phase_summaries # Nothing to summarize + + # Get summarization prompt + phase_prompt, _ = self.load_summarization_prompts() + + # Generate a summary of the older phases + older_text = "" + for phase in older_phases: + older_text += f"PHASE {phase}:\n{phase_summaries[phase]}\n\n" + + # Check if we already have a cached summary for this exact text + if older_text in self.phase_summary_cache: + summary = self.phase_summary_cache[older_text] + else: + # Generate new summary + summarization_client = load_model_client(self.summary_model, power_name=power_name, emptysystem=True) + formatted_prompt = phase_prompt.replace("{phase_history}", older_text) + summary = summarization_client.generate_response(formatted_prompt) + + # Cache the result + self.phase_summary_cache[older_text] = summary + + # Create new dictionary with summarized older phases and intact recent phases + result = {} + + # Add the summary as a special entry + summary_key = f"SUMMARY_UNTIL_{older_phases[-1]}" + result[summary_key] = summary + + # Add the recent phases as-is + for phase in recent_phases: + result[phase] = phase_summaries[phase] + + return result + + def summarize_message_history( + self, + message_history: str, + power_name: Optional[str] = None, + organized_by_relationship: bool = True + ) -> str: + """ + Create a condensed version of message history. + If organized_by_relationship is True, assumes the history is already + organized by power relationships. + + Returns a condensed message history. + """ + if not self.should_summarize_messages(message_history): + return message_history + + # Mark summarization time + self.last_message_summary_time = time.time() + + # Get summarization prompt + _, message_prompt = self.load_summarization_prompts() + + # Check if we already have a cached summary for this exact text + if message_history in self.message_summary_cache: + return self.message_summary_cache[message_history] + + # Generate new summary + summarization_client = load_model_client(self.summary_model, power_name=power_name, emptysystem=True) + formatted_prompt = message_prompt.replace("{message_history}", message_history) + summary = summarization_client.generate_response(formatted_prompt) + + # Cache the result + self.message_summary_cache[message_history] = summary + + return summary + + def get_optimized_phase_summaries( + self, + game, + power_name: Optional[str] = None + ) -> Dict[str, str]: + """ + Main access point for getting optimized phase summaries. + If summaries are below threshold, returns original. + Otherwise, returns condensed version. + """ + if not hasattr(game, "phase_summaries") or not game.phase_summaries: + return {} + + if self.should_summarize_phases(game.phase_summaries): + # Create condensed version + return self.summarize_phase_history(game.phase_summaries, power_name) + else: + # Return original + return game.phase_summaries + + def get_optimized_message_history( + self, + game_history, + power_name: Optional[str] = None, + organized_history: Optional[str] = None + ) -> str: + """ + Main access point for getting optimized message history. + + Args: + game_history: The GameHistory object + power_name: The power requesting the history + organized_history: Optional pre-organized history text + + Returns: + Optimized message history as string + """ + # Get the raw message history + if organized_history is not None: + message_history = organized_history + elif hasattr(game_history, "get_game_history"): + message_history = game_history.get_game_history(power_name) or "(No history yet)" + else: + message_history = str(game_history) if game_history else "(No history yet)" + + if self.should_summarize_messages(message_history): + # Create condensed version + return self.summarize_message_history(message_history, power_name) + else: + # Return original + return message_history + + +# Global context manager instance +# This can be configured at startup +context_manager = ContextManager() + +def configure_context_manager( + phase_threshold: int = 5000, + message_threshold: int = 5000, + summary_model: str = "o3-mini" +) -> None: + """ + Configure the global context manager. + Should be called early in the application lifecycle. + """ + global context_manager + context_manager = ContextManager( + phase_token_threshold=phase_threshold, + message_token_threshold=message_threshold, + summary_model=summary_model + ) + +def get_optimized_context( + game, + game_history, + power_name: Optional[str] = None, + organized_history: Optional[str] = None +) -> Tuple[Dict[str, str], str]: + """ + Convenience function to get both optimized phase summaries and message history. + + Returns: + Tuple of (optimized_phase_summaries, optimized_message_history) + """ + optimized_phases = context_manager.get_optimized_phase_summaries(game, power_name) + optimized_messages = context_manager.get_optimized_message_history( + game_history, power_name, organized_history + ) + + return optimized_phases, optimized_messages \ No newline at end of file diff --git a/ai_diplomacy/model_loader.py b/ai_diplomacy/model_loader.py new file mode 100644 index 0000000..372aef6 --- /dev/null +++ b/ai_diplomacy/model_loader.py @@ -0,0 +1,32 @@ +import os +import logging +from typing import Optional +from dotenv import load_dotenv +from openai import OpenAI +from anthropic import Anthropic +from google import genai +from openai import OpenAI as DeepSeekOpenAI + +logger = logging.getLogger(__name__) + +load_dotenv() + +def load_model_client(model_id: str, power_name: Optional[str] = None, emptysystem: bool = False) -> 'BaseModelClient': + """ + Returns the appropriate LLM client for a given model_id string, optionally keyed by power_name. + Example usage: + client = load_model_client("claude-3-5-sonnet-20241022", power_name="FRANCE", emptysystem=True) + """ + # Import here to avoid circular imports + from .clients import ClaudeClient, GeminiClient, DeepSeekClient, OpenAIClient + + lower_id = model_id.lower() + if "claude" in lower_id: + return ClaudeClient(model_id, power_name, emptysystem=emptysystem) + elif "gemini" in lower_id: + return GeminiClient(model_id, power_name, emptysystem=emptysystem) + elif "deepseek" in lower_id: + return DeepSeekClient(model_id, power_name, emptysystem=emptysystem) + else: + # Default to OpenAI + return OpenAIClient(model_id, power_name) \ No newline at end of file diff --git a/ai_diplomacy/prompts/message_summary_prompt.txt b/ai_diplomacy/prompts/message_summary_prompt.txt new file mode 100644 index 0000000..9dc34e1 --- /dev/null +++ b/ai_diplomacy/prompts/message_summary_prompt.txt @@ -0,0 +1,25 @@ +You are summarizing diplomatic messages in a Diplomacy game. +Create a concise summary of the conversations between powers that preserves: +1. Agreements and alliances formed +2. Betrayals and broken promises +3. Strategic intentions revealed +4. Explicit threats or support offered +5. Key relationships between each power + +Organize by relationships (e.g., FRANCE-GERMANY, ENGLAND-RUSSIA), prioritizing the most +significant interactions. Include specific territory names mentioned. + +In your summary, maintain all of the following critical diplomatic information: +- Specific agreements about attacking or supporting certain territories +- Promises of non-aggression and their scope/duration +- Discussions about supply center control and transfers +- Stated preferences about other powers (who they want to attack/support) +- Explicit lies or deceptions that were revealed +- Coordination of moves between powers + +The summary must reflect the actual diplomatic landscape accurately so players can make informed decisions and remember past interactions that might influence current negotiations. + +ORIGINAL MESSAGE HISTORY: +{message_history} + +SUMMARY: \ No newline at end of file diff --git a/ai_diplomacy/prompts/phase_summary_prompt.txt b/ai_diplomacy/prompts/phase_summary_prompt.txt new file mode 100644 index 0000000..7a7775e --- /dev/null +++ b/ai_diplomacy/prompts/phase_summary_prompt.txt @@ -0,0 +1,25 @@ +You are summarizing the history of a Diplomacy game. +Create a concise summary that preserves all strategically relevant information about: +1. Supply center changes +2. Unit movements and their results +3. Key battles and their outcomes +4. Territory control shifts + +Focus on what actually happened, not explanations or justifications. +Maintain the chronological structure but condense verbose descriptions. +Use clear, factual language with specific location names. +Ensure your summary maintains important tactical and strategic information that would be necessary for a player to make informed decisions. + +In Diplomacy, the specific territories mentioned and their control status are crucial - make sure your summary preserves: +- Which Powers gained or lost Supply Centers (and which specific centers) +- Successful or failed attacks and their specific locations +- Supports that were cut or maintained +- Bounces between units +- Dislodgements and retreats + +Your summary should allow a player to understand the key developments in the game without losing essential strategic information. + +ORIGINAL PHASE HISTORY: +{phase_history} + +SUMMARY: \ No newline at end of file diff --git a/ai_diplomacy/utils.py b/ai_diplomacy/utils.py index 0d11c63..ab6b76e 100644 --- a/ai_diplomacy/utils.py +++ b/ai_diplomacy/utils.py @@ -29,7 +29,7 @@ def assign_models_to_powers(randomize=True): "gemini-1.5-flash", "gemini-2.0-flash", "gemini-2.0-flash-lite-preview-02-05", - "gpt-3.5-turbo", + "gemini-1.5-pro", "gpt-4o-mini", "claude-3-5-haiku-20241022", ] @@ -201,6 +201,7 @@ def format_power_units_and_centers(game, power_name, board_state): Also includes information about neutral centers. """ # Add neutral centers info + output = "" if power_name == "NEUTRAL": all_controlled = set() for centers in board_state["centers"].values(): @@ -424,147 +425,84 @@ def generate_order_description(game, order, order_type, power_centers, supply_ce return "" -def format_convoy_paths(game, convoy_paths_possible): +def format_convoy_paths(game, convoy_paths_possible, power_name): """ - Format convoy paths in a strategically meaningful way, - grouping by region and highlighting strategic objectives. - - Input format: - [('START', {required fleets}, {possible destinations}), ...] - - Example tuple: ('ALB', {'ION'}, {'GRE', 'APU', 'NAP', 'TUN'}) + Format convoy paths by region and ownership, focusing on strategically relevant convoys. + Input format: List of (start_loc, {required_fleets}, {possible_destinations}) """ + # check if convoy_paths_possible is empty dictionary or list or none + output = "" if not convoy_paths_possible: - return "CONVOY POSSIBILITIES: None currently available.\n" - - # Group convoy paths by general region - regional_paths = { - "MEDITERRANEAN": [], # Central/Southern paths - "NORTH SEA": [], # Northern European paths - "BLACK SEA": [], # Eastern paths - "COMPLEX": [] # Multi-fleet convoys + output = "CONVOY POSSIBILITIES: None currently available.\n" + return output + + + # Get unit ownership for identifying our convoys vs others + our_units = set(game.get_units(power_name)) + our_unit_locs = {unit[2:5] for unit in our_units} + + # Group convoys by region and relevance + convoys = { + "YOUR ARMY CONVOYS": [], # Convoys using your armies + "YOUR FLEET CONVOYS": [], # Convoys using your fleets + "ENEMY CONVOYS": [] # Convoys you should watch for } - - # Supply centers for context - supply_centers = set(game.map.scs) - power_centers = {power_name: set(centers) for power_name, centers in game.get_centers().items()} - neutral_centers = supply_centers - set().union(*power_centers.values()) - - # Current power for context - current_power = game.role if hasattr(game, 'role') else None - - # Map locations to regions - mediterranean_waters = {"ION", "TYS", "WES", "ADR", "AEG", "EAS", "LYO"} - north_sea_waters = {"NTH", "NWG", "ENG", "IRI", "SKA", "HEL", "BAL", "BOT", "BAR"} - black_sea_waters = {"BLA"} - - # Process each convoy path - for path in convoy_paths_possible: - start_loc, required_fleets, destinations = path - - # Skip if no destinations - if not destinations: + + # Define major sea regions for better organization + sea_regions = { + 'NTH': "North Sea", + 'MAO': "Mid-Atlantic", + 'TYS': "Tyrrhenian Sea", + 'BLA': "Black Sea", + 'SKA': "Skagerrak", + } + + for start, fleets, destinations in convoy_paths_possible: + # Skip if no destinations or fleets + if not destinations or not fleets: continue - - # Determine region - region = "COMPLEX" # Default - if len(required_fleets) == 1: - fleet_loc = next(iter(required_fleets)) - if fleet_loc in mediterranean_waters: - region = "MEDITERRANEAN" - elif fleet_loc in north_sea_waters: - region = "NORTH SEA" - elif fleet_loc in black_sea_waters: - region = "BLACK SEA" - - # Add strategic context for each destination + + # Determine if this is our army that could be convoyed + is_our_army = start in our_unit_locs + + # Determine if these are our fleets that could convoy + is_our_fleet = any(fleet_loc in our_unit_locs for fleet_loc in fleets) + + # Format the fleet path nicely + fleet_path = " + ".join(f"{sea_regions.get(f, f)}" for f in fleets) + + # Create a list of destinations with context for dest in destinations: - strategic_note = _get_convoy_destination_context( - game, start_loc, dest, supply_centers, power_centers, - neutral_centers, current_power - ) + # Determine if destination is a supply center + is_sc = dest in game.map.scs + sc_note = " (SC)" if is_sc else "" - # Format info about required fleets - if len(required_fleets) == 1: - fleet_info = f"via {next(iter(required_fleets))}" + # Create the basic convoy description + convoy_desc = f"A {start} -> {dest}{sc_note} via {fleet_path}" + + # Add strategic notes + if is_our_army: + category = "YOUR ARMY CONVOYS" + convoys[category].append(f"{convoy_desc}") + elif is_our_fleet: + category = "YOUR FLEET CONVOYS" + convoys[category].append(f"{convoy_desc} (you provide the convoy)") else: - fleet_info = f"via {' + '.join(required_fleets)}" - - # Create entry - entry = (start_loc, dest, fleet_info, strategic_note) - regional_paths[region].append(entry) - - # Format the output + category = "ENEMY CONVOYS" + convoys[category].append(f"{convoy_desc} (possible enemy convoy)") + + # Format output output = "CONVOY POSSIBILITIES:\n\n" - # Show each region - for region, paths in regional_paths.items(): - if not paths: - continue - - output += f"{region} CONVOYS:\n" - - # Group by start location - by_start = {} - for start, dest, fleet_info, note in paths: - by_start.setdefault(start, []).append((dest, fleet_info, note)) - - # Format each start location's options - for start, destinations in by_start.items(): - start_name = game.map.loc_name.get(start, start) - output += f" From {start_name} ({start}):\n" - - for dest, fleet_info, note in destinations: - dest_name = game.map.loc_name.get(dest, dest) - output += f" A {start} - {dest} {fleet_info} ({note})\n" - + for category, convoy_list in convoys.items(): + if convoy_list: + output += f"{category}:\n" + for convoy in sorted(convoy_list): + output += f" {convoy}\n" output += "\n" - + return output - -def _get_convoy_destination_context(game, start, dest, supply_centers, power_centers, neutral_centers, current_power): - """Generate strategic context for convoy destinations""" - start_base = start[:3] # Remove any coast specification - dest_base = dest[:3] # Remove any coast specification - - # Check if destination is a supply center - if dest_base in supply_centers: - if dest_base in neutral_centers: - return f"capture neutral SC {game.map.loc_name.get(dest_base, dest_base)}" - - for power, centers in power_centers.items(): - if dest_base in centers: - if power == current_power: - return f"reinforce your SC {game.map.loc_name.get(dest_base, dest_base)}" - else: - return f"attack {power}'s SC {game.map.loc_name.get(dest_base, dest_base)}" - - # Check for strategic positioning - # Major strategic locations that aren't supply centers - strategic_positions = { - "RUH": "central position threatening multiple German SCs", - "BUR": "central position threatening both France and Germany", - "UKR": "strategic buffer between Russia and Austria-Hungary", - "BOH": "central position for attacking Austria", - "TYR": "mountain pass to either Venice or Munich", - "PIE": "bridgehead into both France and Italy", - "SYR": "buffer protecting Turkey's eastern flank" - } - - if dest_base in strategic_positions: - return strategic_positions[dest_base] - - # By default, highlight the unconventional movement - src_type = game.map.area_type.get(start_base, "") - dest_type = game.map.area_type.get(dest_base, "") - - if src_type == "COAST" and dest_type == "COAST": - return f"bypass land barriers for surprise positioning" - - return f"strategic repositioning" - - def generate_threat_assessment(game, board_state, power_name): """ High-level function that tries to identify immediate threats diff --git a/lm_game.py b/lm_game.py index 74383cf..efc9a9f 100644 --- a/lm_game.py +++ b/lm_game.py @@ -13,7 +13,7 @@ os.environ["GRPC_PYTHON_LOG_LEVEL"] = "40" # ERROR level only from diplomacy import Game from diplomacy.utils.export import to_saved_game_format -from ai_diplomacy.clients import load_model_client +from ai_diplomacy.model_loader import load_model_client from ai_diplomacy.utils import ( get_valid_orders, gather_possible_orders, @@ -21,6 +21,7 @@ from ai_diplomacy.utils import ( ) from ai_diplomacy.negotiations import conduct_negotiations from ai_diplomacy.game_history import GameHistory +from ai_diplomacy.long_story_short import configure_context_manager dotenv.load_dotenv() @@ -78,7 +79,7 @@ def parse_arguments(): ), ) return parser.parse_args() - + def save_game_state(game, result_folder, game_file_path, model_error_stats, args, is_final=False): """ @@ -118,6 +119,12 @@ def save_game_state(game, result_folder, game_file_path, model_error_stats, args def main(): args = parse_arguments() + # Configure the context manager with the same summary model + configure_context_manager( + phase_threshold=10000, + message_threshold=10000, + summary_model=args.summary_model + ) max_year = args.max_year summary_model = args.summary_model