From b4a56126ec9cdcf7780d1c68b5e532bc7a432283 Mon Sep 17 00:00:00 2001 From: sam-paech <152407511+sam-paech@users.noreply.github.com> Date: Sat, 12 Jul 2025 10:17:17 +1000 Subject: [PATCH] state update fixes & streamline prompts --- ai_diplomacy/agent.py | 322 +++++++++++------- ai_diplomacy/clients.py | 2 +- ai_diplomacy/diary_logic.py | 97 ++++-- ai_diplomacy/game_history.py | 145 +++++++- ai_diplomacy/game_logic.py | 11 +- ai_diplomacy/initialization.py | 4 +- ai_diplomacy/negotiations.py | 110 +++--- ai_diplomacy/prompt_constructor.py | 10 +- .../prompts/phase_result_diary_prompt.txt | 3 - .../prompts_simple/context_prompt.txt | 11 +- .../diary_consolidation_prompt.txt | 15 +- .../negotiation_diary_prompt.txt | 42 ++- .../phase_result_diary_prompt.txt | 23 +- analysis/statistical_game_analysis.py | 132 ++++++- .../analysis/statistical_game_analysis.py | 15 +- experiment_runner/analysis/summary.py | 1 + lm_game.py | 48 +-- 17 files changed, 710 insertions(+), 281 deletions(-) diff --git a/ai_diplomacy/agent.py b/ai_diplomacy/agent.py index a2e64ae..094af3f 100644 --- a/ai_diplomacy/agent.py +++ b/ai_diplomacy/agent.py @@ -97,6 +97,26 @@ class DiplomacyAgent: logger.info(f"Initialized DiplomacyAgent for {self.power_name} with goals: {self.goals}") self.add_journal_entry(f"Agent initialized. Initial Goals: {self.goals}") + def _format_board_state(self, board_state_dict): + units = board_state_dict.get('units', {}) + centers = board_state_dict.get('centers', {}) + + eliminated = {power for power, scs in centers.items() if not scs} + + parts = ["Units:"] + for power, unit_list in sorted(units.items()): + label = f"{power} (Eliminated)" if power in eliminated else power + parts.append(f" {label}: {', '.join(unit_list)}") + + parts.append("Centers:") + for power, center_list in sorted(centers.items()): + label = f"{power} (Eliminated)" if power in eliminated else power + parts.append(f" {label}: {', '.join(center_list)}") + + return "\n".join(parts) + + + def _extract_json_from_text(self, text: str) -> dict: """Extract and parse JSON from text, handling common LLM response formats.""" if not text or not text.strip(): @@ -368,6 +388,46 @@ class DiplomacyAgent: f"[{self.power_name}] DIARY ENTRY ADDED for {phase}. Total full entries: {len(self.full_private_diary)}. New entry: {entry[:100]}..." ) + def get_latest_phase_diary_entries( + self, + *, + use_private_diary: bool = False, + separator: str = "\n\n", + ) -> str: + """ + Return all diary entries for the most-recent phase. + + Args: + use_private_diary: If True look at self.private_diary, otherwise + self.full_private_diary (default). + separator: String to place between entries in the final output. + + Returns: + A single formatted string containing every entry from the + latest phase, or an empty string if no diary content exists. + """ + diary: List[str] = self.private_diary if use_private_diary else self.full_private_diary + if not diary: + return "" + + # Expect entries like "[S1901M] text…" + phase_match = re.match(r"\[([^\]]+)\]", diary[-1]) + if not phase_match: + # Last line didn’t start with a phase tag; just return it. + return diary[-1] + + latest_phase = phase_match.group(1) + recent_entries: List[str] = [] + + for entry in reversed(diary): + if entry.startswith(f"[{latest_phase}]"): + recent_entries.append(entry) + else: + break + + recent_entries.reverse() # restore chronological order + return separator.join(recent_entries) + def format_private_diary_for_prompt(self) -> str: """ Formats the context diary for inclusion in a prompt. @@ -437,12 +497,12 @@ class DiplomacyAgent: # Prepare context for the prompt board_state_dict = game.get_state() - board_state_str = f"Units: {board_state_dict.get('units', {})}, Centers: {board_state_dict.get('centers', {})}" + board_state_str = self._format_board_state(board_state_dict) messages_this_round = game_history.get_messages_this_round(power_name=self.power_name, current_phase_name=game.current_short_phase) if not messages_this_round.strip() or messages_this_round.startswith("\n(No messages"): messages_this_round = ( - "(No messages involving your power this round that require deep reflection for diary. Focus on overall situation.)" + "(No messages involving your power this round.)" ) current_relationships_str = json.dumps(self.relationships) @@ -463,31 +523,34 @@ class DiplomacyAgent: # Do aggressive preprocessing of the template to fix the problematic patterns # This includes removing any newlines or whitespace before JSON keys that cause issues - for pattern in ["negotiation_summary", "updated_relationships", "relationship_updates", "intent"]: - # Fix the "\n "key"" pattern that breaks .format() - prompt_template_content = re.sub(rf'\n\s*"{pattern}"', f'"{pattern}"', prompt_template_content) + if False: + for pattern in ["negotiation_summary", "updated_relationships", "relationship_updates", "intent"]: + # Fix the "\n "key"" pattern that breaks .format() + prompt_template_content = re.sub(rf'\n\s*"{pattern}"', f'"{pattern}"', prompt_template_content) - # Escape all curly braces in JSON examples to prevent format() from interpreting them - # First, temporarily replace the actual template variables - temp_vars = [ - "power_name", - "current_phase", - "messages_this_round", - "agent_goals", - "agent_relationships", - "board_state_str", - "ignored_messages_context", - ] - for var in temp_vars: - prompt_template_content = prompt_template_content.replace(f"{{{var}}}", f"<<{var}>>") + # Escape all curly braces in JSON examples to prevent format() from interpreting them + # First, temporarily replace the actual template variables + + temp_vars = [ + "power_name", + "current_phase", + "messages_this_round", + "agent_goals", + "agent_relationships", + "board_state_str", + "ignored_messages_context", + "private_diary_summary", + ] + for var in temp_vars: + prompt_template_content = prompt_template_content.replace(f"{{{var}}}", f"<<{var}>>") - # Now escape all remaining braces (which should be JSON) - prompt_template_content = prompt_template_content.replace("{", "{{") - prompt_template_content = prompt_template_content.replace("}", "}}") + # Now escape all remaining braces (which should be JSON) + prompt_template_content = prompt_template_content.replace("{", "{{") + prompt_template_content = prompt_template_content.replace("}", "}}") - # Restore the template variables - for var in temp_vars: - prompt_template_content = prompt_template_content.replace(f"<<{var}>>", f"{{{var}}}") + # Restore the template variables + for var in temp_vars: + prompt_template_content = prompt_template_content.replace(f"<<{var}>>", f"{{{var}}}") # Create a dictionary with safe values for formatting format_vars = { @@ -515,8 +578,6 @@ class DiplomacyAgent: logger.debug(f"[{self.power_name}] Negotiation diary prompt:\n{full_prompt[:500]}...") - logger.debug(f"[{self.power_name}] Negotiation diary prompt:\n{full_prompt[:500]}...") - raw_response = await run_llm_and_log( client=self.client, prompt=full_prompt, @@ -567,7 +628,6 @@ class DiplomacyAgent: diary_text_candidate = parsed_data["intent"] else: diary_text_candidate += "\nIntent: " + parsed_data["intent"] - if diary_text_candidate: diary_entry_text = diary_text_candidate else: @@ -610,6 +670,10 @@ class DiplomacyAgent: elif new_relationships is not None: # It was provided but not a dict logger.warning(f"[{self.power_name}] 'updated_relationships' from diary LLM was not a dictionary: {type(new_relationships)}") + # update goals + if "goals" in parsed_data: + self.update_goals(parsed_data["goals"]) + # Add the generated (or fallback) diary entry self.add_diary_entry(diary_entry_text, game.current_short_phase) if relationships_updated: @@ -627,16 +691,19 @@ class DiplomacyAgent: self.add_diary_entry(f"(Error generating diary entry: {type(e).__name__})", game.current_short_phase) finally: if log_file_path: # Ensure log_file_path is provided - log_llm_response( - log_file_path=log_file_path, - model_name=self.client.model_name if self.client else "UnknownModel", - power_name=self.power_name, - phase=game.current_short_phase if game else "UnknownPhase", - response_type="negotiation_diary", # Specific type for CSV logging - raw_input_prompt=full_prompt, - raw_response=raw_response, - success=success_status, - ) + try: + log_llm_response( + log_file_path=log_file_path, + model_name=self.client.model_name if self.client else "UnknownModel", + power_name=self.power_name, + phase=game.current_short_phase if game else "UnknownPhase", + response_type="negotiation_diary", # Specific type for CSV logging + raw_input_prompt=full_prompt, + raw_response=raw_response, + success=success_status, + ) + except Exception as e: + print(e) async def generate_order_diary_entry(self, game: "Game", orders: List[str], log_file_path: str): """ @@ -783,105 +850,108 @@ class DiplomacyAgent: # Rest of the code remains the same async def generate_phase_result_diary_entry( - self, game: "Game", game_history: "GameHistory", phase_summary: str, all_orders: Dict[str, List[str]], log_file_path: str + self, game: "Game", game_history: "GameHistory", phase_summary: str, all_orders: Dict[str, List[str]], log_file_path: str, phase_name: str ): - """ - Generates a diary entry analyzing the actual phase results, - comparing them to negotiations and identifying betrayals/collaborations. - """ - logger.info(f"[{self.power_name}] Generating phase result diary entry for {game.current_short_phase}...") - - # Load the template - prompt_template = load_prompt("phase_result_diary_prompt.txt", prompts_dir=self.prompts_dir) - if not prompt_template: - logger.error(f"[{self.power_name}] Could not load phase_result_diary_prompt.txt. Skipping diary entry.") - return - - # Format all orders for the prompt - all_orders_formatted = "" - for power, orders in all_orders.items(): - orders_str = ", ".join(orders) if orders else "No orders" - all_orders_formatted += f"{power}: {orders_str}\n" - - # Get your own orders - your_orders = all_orders.get(self.power_name, []) - your_orders_str = ", ".join(your_orders) if your_orders else "No orders" - - # Get recent negotiations for this phase - messages_this_phase = game_history.get_messages_by_phase(game.current_short_phase) - your_negotiations = "" - for msg in messages_this_phase: - if msg.sender == self.power_name: - your_negotiations += f"To {msg.recipient}: {msg.content}\n" - elif msg.recipient == self.power_name: - your_negotiations += f"From {msg.sender}: {msg.content}\n" - - if not your_negotiations: - your_negotiations = "No negotiations this phase" - - # Format relationships - relationships_str = "\n".join([f"{p}: {r}" for p, r in self.relationships.items()]) - - # Format goals - goals_str = "\n".join([f"- {g}" for g in self.goals]) if self.goals else "None" - - # Create the prompt - prompt = prompt_template.format( - power_name=self.power_name, - current_phase=game.current_short_phase, - phase_summary=phase_summary, - all_orders_formatted=all_orders_formatted, - your_negotiations=your_negotiations, - pre_phase_relationships=relationships_str, - agent_goals=goals_str, - your_actual_orders=your_orders_str, - ) - - logger.debug(f"[{self.power_name}] Phase result diary prompt:\n{prompt[:500]}...") - - raw_response = "" - success_status = "FALSE" - try: - raw_response = await run_llm_and_log( - client=self.client, - prompt=prompt, + """ + Generates a diary entry analyzing the actual phase results, + comparing them to negotiations and identifying betrayals/collaborations. + """ + logger.info(f"[{self.power_name}] Generating phase result diary entry for {game.current_short_phase}...") + + # Load the template + prompt_template = load_prompt("phase_result_diary_prompt.txt", prompts_dir=self.prompts_dir) + if not prompt_template: + logger.error(f"[{self.power_name}] Could not load phase_result_diary_prompt.txt. Skipping diary entry.") + return + + # Format all orders for the prompt + all_orders_formatted = game_history.get_order_history_for_prompt( + game=game, # Pass the game object for normalization power_name=self.power_name, - phase=game.current_short_phase, - response_type="phase_result_diary", + current_phase_name=game.current_short_phase, + num_movement_phases_to_show=1, ) - if raw_response and raw_response.strip(): - # The response should be plain text diary entry - diary_entry = raw_response.strip() - self.add_diary_entry(diary_entry, game.current_short_phase) - success_status = "TRUE" - logger.info(f"[{self.power_name}] Phase result diary entry generated and added.") - else: - fallback_diary = ( - f"Phase {game.current_short_phase} completed. Orders executed as: {your_orders_str}. (Failed to generate detailed analysis)" + formatted_diary = self.format_private_diary_for_prompt() + + board_state_dict = game.get_state() + board_state_str = self._format_board_state(board_state_dict) + + # Get recent negotiations for this phase + messages_this_round = game_history.get_messages_this_round(power_name=self.power_name, current_phase_name=game.current_short_phase) + if not messages_this_round.strip() or messages_this_round.startswith("\n(No messages"): + messages_this_round = ( + "(No messages involving your power this round.)" ) - self.add_diary_entry(fallback_diary, game.current_short_phase) - logger.warning(f"[{self.power_name}] Empty response from LLM. Added fallback phase result diary.") - success_status = "FALSE" - except Exception as e: - logger.error(f"[{self.power_name}] Error generating phase result diary: {e}", exc_info=True) - fallback_diary = f"Phase {game.current_short_phase} completed. Unable to analyze results due to error." - self.add_diary_entry(fallback_diary, game.current_short_phase) - success_status = f"FALSE: {type(e).__name__}" - finally: - log_llm_response( - log_file_path=log_file_path, - model_name=self.client.model_name, + # Format relationships + relationships_str = "\n".join([f"{p}: {r}" for p, r in self.relationships.items()]) + + # Format goals + goals_str = "\n".join([f"- {g}" for g in self.goals]) if self.goals else "None" + + # Create the prompt + prompt = prompt_template.format( power_name=self.power_name, - phase=game.current_short_phase, - response_type="phase_result_diary", - raw_input_prompt=prompt, - raw_response=raw_response, - success=success_status, + current_phase=phase_name, + phase_summary=phase_summary, + all_orders_formatted=all_orders_formatted, + your_negotiations=messages_this_round, + pre_phase_relationships=relationships_str, + agent_goals=goals_str, + formatted_diary=formatted_diary, + board_state=board_state_str, ) + logger.debug(f"[{self.power_name}] Phase result diary prompt:\n{prompt[:500]}...") + + raw_response = "" + success_status = "FALSE" + + try: + raw_response = await run_llm_and_log( + client=self.client, + prompt=prompt, + power_name=self.power_name, + phase=phase_name, + response_type="phase_result_diary", + ) + + if raw_response and raw_response.strip(): + # The response should be plain text diary entry + diary_entry = raw_response.strip() + self.add_diary_entry(diary_entry, phase_name) + success_status = "TRUE" + logger.info(f"[{self.power_name}] Phase result diary entry generated and added.") + else: + fallback_diary = ( + f"Phase {phase_name} completed." + ) + self.add_diary_entry(fallback_diary, phase_name) + logger.warning(f"[{self.power_name}] Empty response from LLM. Added fallback phase result diary.") + success_status = "FALSE" + + except Exception as e: + logger.error(f"[{self.power_name}] Error generating phase result diary: {e}", exc_info=True) + fallback_diary = f"Phase {phase_name} completed. Unable to analyze results due to error." + self.add_diary_entry(fallback_diary, phase_name) + success_status = f"FALSE: {type(e).__name__}" + finally: + log_llm_response( + log_file_path=log_file_path, + model_name=self.client.model_name, + power_name=self.power_name, + phase=phase_name, + response_type="phase_result_diary", + raw_input_prompt=prompt, + raw_response=raw_response, + success=success_status, + ) + except Exception as e: + logger.error(e) + logger.error('!generate_phase_result_diary_entry failed') + def log_state(self, prefix=""): logger.debug(f"[{self.power_name}] {prefix} State: Goals={self.goals}, Relationships={self.relationships}") diff --git a/ai_diplomacy/clients.py b/ai_diplomacy/clients.py index 1687165..974e3ba 100644 --- a/ai_diplomacy/clients.py +++ b/ai_diplomacy/clients.py @@ -1039,7 +1039,7 @@ class OpenRouterClient(BaseModelClient): logger.debug(f"[{self.model_name}] Initialized OpenRouter client") - async def generate_response(self, prompt: str, temperature: float = 0.5, inject_random_seed: bool = True) -> str: + async def generate_response(self, prompt: str, temperature: float = 0.0, inject_random_seed: bool = True) -> str: """Generate a response using OpenRouter with robust error handling.""" try: # Append the call to action to the user's prompt diff --git a/ai_diplomacy/diary_logic.py b/ai_diplomacy/diary_logic.py index 407739d..e20c925 100644 --- a/ai_diplomacy/diary_logic.py +++ b/ai_diplomacy/diary_logic.py @@ -11,49 +11,90 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) - async def run_diary_consolidation( agent: "DiplomacyAgent", game: "Game", log_file_path: str, - entries_to_keep_unsummarized: int = 6, + years_to_keep_unsummarised: int = 1, prompts_dir: Optional[str] = None, ): """ Consolidate older diary entries while keeping recent ones. - This is the logic moved from the DiplomacyAgent class. + + Parameters + ---------- + agent : DiplomacyAgent + game : Game + log_file_path : str + years_to_keep_unsummarised : int, default 1 + Number of *distinct years* whose entries remain verbatim. + prompts_dir : Optional[str] """ - logger.info(f"[{agent.power_name}] CONSOLIDATION START — {len(agent.full_private_diary)} total full entries") + logger.info( + f"[{agent.power_name}] CONSOLIDATION START — " + f"{len(agent.full_private_diary)} total full entries" + ) - full_entries = [e for e in agent.full_private_diary if not e.startswith("[CONSOLIDATED HISTORY]")] + # Remove any earlier consolidated block first + full_entries = [ + e for e in agent.full_private_diary + if not e.startswith("[CONSOLIDATED HISTORY]") + ] - if len(full_entries) <= entries_to_keep_unsummarized: - agent.private_diary = list(agent.full_private_diary) - logger.info(f"[{agent.power_name}] ≤ {entries_to_keep_unsummarized} full entries — skipping consolidation") + if not full_entries: + agent.private_diary = [] + logger.warning(f"[{agent.power_name}] No diary entries found") return - boundary_entry = full_entries[-entries_to_keep_unsummarized] - match = re.search(r"\[[SFWRAB]\s*(\d{4})", boundary_entry) - if not match: - logger.error(f"[{agent.power_name}] Could not parse year from boundary entry; aborting consolidation") + # Extract years by scanning from newest to oldest + year_re = re.compile(r"\[[SFWRAB]\s*(\d{4})") # matches “[S1901”, “[F1902”…” + recent_years: list[int] = [] + + for entry in reversed(full_entries): # newest last + match = year_re.search(entry) + if not match: + # Lines without a year tag are considered “dateless”; keep them + continue + yr = int(match.group(1)) + if yr not in recent_years: + recent_years.append(yr) + if len(recent_years) >= years_to_keep_unsummarised: + break + + # If every distinct year falls inside the keep-window, skip consolidation + all_years = { + int(m.group(1)) + for e in full_entries + if (m := year_re.search(e)) + } + if len(all_years - set(recent_years)) == 0: agent.private_diary = list(agent.full_private_diary) + logger.info( + f"[{agent.power_name}] ≤ {years_to_keep_unsummarised} distinct years " + "— skipping consolidation" + ) return - cutoff_year = int(match.group(1)) - logger.info(f"[{agent.power_name}] Cut-off year for consolidation: {cutoff_year}") + # Partition entries + keep_set = set(recent_years) - def _entry_year(entry: str) -> int | None: - m = re.search(r"\[[SFWRAB]\s*(\d{4})", entry) + def _entry_year(entry: str) -> Optional[int]: + m = year_re.search(entry) return int(m.group(1)) if m else None - entries_to_summarize = [e for e in full_entries if (_entry_year(e) is not None and _entry_year(e) < cutoff_year)] - entries_to_keep = [e for e in full_entries if (_entry_year(e) is None or _entry_year(e) >= cutoff_year)] + entries_to_keep = [e for e in full_entries if (_entry_year(e) in keep_set)] + entries_to_summarise = [e for e in full_entries if (_entry_year(e) not in keep_set)] - logger.info(f"[{agent.power_name}] Summarising {len(entries_to_summarize)} entries; keeping {len(entries_to_keep)} recent entries verbatim") + logger.info( + f"[{agent.power_name}] Summarising {len(entries_to_summarise)} entries " + f"from years < {min(keep_set)}; keeping {len(entries_to_keep)} recent entries verbatim" + ) - if not entries_to_summarize: + if not entries_to_summarise: agent.private_diary = list(agent.full_private_diary) - logger.warning(f"[{agent.power_name}] No eligible entries to summarise; context diary left unchanged") + logger.warning( + f"[{agent.power_name}] No eligible entries to summarise; context diary left unchanged" + ) return prompt_template = load_prompt("diary_consolidation_prompt.txt", prompts_dir=prompts_dir) @@ -63,7 +104,7 @@ async def run_diary_consolidation( prompt = prompt_template.format( power_name=agent.power_name, - full_diary_text="\n\n".join(entries_to_summarize), + full_diary_text="\n\n".join(entries_to_summarise), ) raw_response = "" @@ -71,7 +112,6 @@ async def run_diary_consolidation( consolidation_client = None try: consolidation_client = agent.client - raw_response = await run_llm_and_log( client=consolidation_client, prompt=prompt, @@ -87,14 +127,21 @@ async def run_diary_consolidation( new_summary_entry = f"[CONSOLIDATED HISTORY] {consolidated_text}" agent.private_diary = [new_summary_entry] + entries_to_keep success_flag = "TRUE" - logger.info(f"[{agent.power_name}] Consolidation complete — {len(agent.private_diary)} context entries now") + logger.info( + f"[{agent.power_name}] Consolidation complete — " + f"{len(agent.private_diary)} context entries now" + ) except Exception as exc: logger.error(f"[{agent.power_name}] Diary consolidation failed: {exc}", exc_info=True) finally: log_llm_response( log_file_path=log_file_path, - model_name=(consolidation_client.model_name if consolidation_client is not None else agent.client.model_name), + model_name=( + consolidation_client.model_name + if consolidation_client is not None + else agent.client.model_name + ), power_name=agent.power_name, phase=game.current_short_phase, response_type="diary_consolidation", diff --git a/ai_diplomacy/game_history.py b/ai_diplomacy/game_history.py index c0bf78e..ef51c14 100644 --- a/ai_diplomacy/game_history.py +++ b/ai_diplomacy/game_history.py @@ -182,7 +182,7 @@ class GameHistory: eng2code = {"AUSTRIA": "AUT", "ENGLAND": "ENG", "FRANCE": "FRA", "GERMANY": "GER", "ITALY": "ITA", "RUSSIA": "RUS", "TURKEY": "TUR"} norm = game.map.norm - out_lines = ["**ORDER HISTORY (Recent Rounds)**"] + out_lines = [] for ph in phases_to_report: if not (ph.orders_by_power or ph.submitted_orders_by_power): @@ -234,8 +234,14 @@ class GameHistory: tag = "bounce" elif "void" == tag: tag = "void: no effect" - - out_lines.append(f" {order} ({tag})") + + # don't show (success) tag for hold moves, it might be causing convergence on + # always-hold behaviour + is_hold = any(kw in order.upper() for kw in (" H", " HOLD")) + if tag == "success" and is_hold: + out_lines.append(f" {order}") + else: + out_lines.append(f" {order} ({tag})") seen_ok.add(_norm_keep(order)) # 2️⃣ invalid submissions @@ -246,6 +252,139 @@ class GameHistory: return "\n(No orders were issued in recent history)\n" return "\n".join(out_lines) + def get_orders_history_for_phase( + self, + game: "Game", + phase_name: str, # ← the single phase we want + ) -> Dict[str, Dict[str, List[Dict[str, str]]]]: + """ + Return the orders for `phase_name` as: + + { + "": { + "": [ + {"order": "", "result": ""}, + ... + ], + ... + }, + ... + } + + Order types: move, hold, support, convoy, build, disband, waive, other. + """ + + # ── locate the requested phase ────────────────────────────── + target_phase = next((p for p in self.phases if p.name == phase_name), None) + if not target_phase or not (target_phase.orders_by_power or target_phase.submitted_orders_by_power): + return {} + + # ── helpers ─────────────────────────────────────────────── + def _scalar(res): + """Flatten lists/dicts to a single outcome token.""" + tag = res + while isinstance(tag, list): + tag = tag[0] if tag else "" + if isinstance(tag, dict): + tag = tag.get("outcome") or tag.get("result") or "" + return str(tag).strip().lower() + + def _order_type(order: str) -> str: + o = order.upper() + if o == "WAIVE": + return "waive" + if " H" in o or " HOLD" in o: + return "hold" + if " S " in o: + return "support" + if " C " in o: + return "convoy" + if " R " in o: + return "retreat" + if " - " in o: + return "move" + if " BUILD" in o or o.endswith(" B") or " B " in o: + return "build" + if " DISBAND" in o or o.endswith(" D") or " D " in o: + return "disband" + return "other" + + # engine fallback + engine_phases = {ph.name: ph for ph in getattr(game, "get_phase_history", lambda: [])()} + eng2code = { + "AUSTRIA": "AUT", "ENGLAND": "ENG", "FRANCE": "FRA", + "GERMANY": "GER", "ITALY": "ITA", "RUSSIA": "RUS", "TURKEY": "TUR", + } + norm = game.map.norm + + orders_by_power = defaultdict(lambda: defaultdict(list)) + + # iterate powers present in this phase + for pwr in sorted(set(target_phase.orders_by_power) | set(target_phase.submitted_orders_by_power)): + submitted = target_phase.submitted_orders_by_power.get(pwr, []) + accepted = target_phase.orders_by_power.get(pwr, []) + + if isinstance(submitted, str): + submitted = [submitted] + if isinstance(accepted, str): + accepted = [accepted] + + def _norm_keep(o): + return o if o.upper() == "WAIVE" else norm(o) + + sub_norm = {_norm_keep(o): o for o in submitted} + acc_norm = {_norm_keep(o): o for o in accepted} + + # outcome source + raw_res = target_phase.results_by_power.get(pwr) or target_phase.results_by_power or {} + if not raw_res: + eng = engine_phases.get(target_phase.name) + if eng and hasattr(eng, "order_results"): + key = next((k for k, v in eng2code.items() if v == pwr), None) + raw_res = (eng.order_results or {}).get(key, {}) + + seen_ok = set() + + # accepted orders + for idx, order in enumerate(accepted): + if isinstance(raw_res, dict): + res_raw = raw_res.get(order) or raw_res.get(" ".join(order.split()[:2])) + elif isinstance(raw_res, list) and idx < len(raw_res): + res_raw = raw_res[idx] + else: + res_raw = "" + + tag = _scalar(res_raw) + if not tag or tag == "ok": + tag = "success" + elif "bounce" in tag: + tag = "bounce" + elif "void" == tag: + tag = "void: no effect" + + result_field = tag + + orders_by_power[pwr][_order_type(order)].append( + {"order": order, "result": result_field} + ) + seen_ok.add(_norm_keep(order)) + + # invalid submissions + for k in sorted(set(sub_norm) - seen_ok): + order_str = sub_norm[k] + orders_by_power[pwr][_order_type(order_str)].append( + {"order": order_str, "result": "invalid"} + ) + + # convert nested defaultdicts to regular dicts + return { + pwr: {otype: lst for otype, lst in type_map.items()} + for pwr, type_map in orders_by_power.items() + } + + + + def get_messages_this_round(self, power_name: str, current_phase_name: str) -> str: current_phase: Optional[Phase] = None for phase_obj in self.phases: diff --git a/ai_diplomacy/game_logic.py b/ai_diplomacy/game_logic.py index 9f893b2..31b09f5 100644 --- a/ai_diplomacy/game_logic.py +++ b/ai_diplomacy/game_logic.py @@ -133,7 +133,7 @@ def save_game_state( if year_val is not None and year_val > run_config.max_year: break - phase_name = phase_block["name"] + phase_name = phase_block["name"] # 3a. Re-attach anything we cached from a previous save. if phase_name in previous_phase_extras: @@ -151,12 +151,15 @@ def save_game_state( # ------------------------------------------------------------------- phase_block["config"] = cfg phase_block["state_agents"] = current_state_agents + phase_block["order_results"] = game_history.get_orders_history_for_phase( + game, completed_phase_name + ) # -------------------------------------------------------------- # # 4. Attach top-level metadata and write atomically. # # -------------------------------------------------------------- # saved_game["phase_summaries"] = getattr(game, "phase_summaries", {}) - saved_game["final_agent_states"] = {p_name: {"relationships": a.relationships, "goals": a.goals} for p_name, a in agents.items()} + saved_game["final_agent_states"] = {p_name: {"relationships": a.relationships, "goals": a.goals} for p_name, a in agents.items()} # Filter out phases > max_year # saved_game["phases"] = [ @@ -210,8 +213,8 @@ def load_game_state( last_phase = saved_game_data["phases"][-1] # Wipe the data that must be regenerated **but preserve the keys** - last_phase["orders"] = {} # was dict - last_phase["results"] = {} # was dict + last_phase["orders"] = {} + last_phase["results"] = {} last_phase["messages"] = [] game = from_saved_game_format(saved_game_data) diff --git a/ai_diplomacy/initialization.py b/ai_diplomacy/initialization.py index 0bdce77..d8cb30e 100644 --- a/ai_diplomacy/initialization.py +++ b/ai_diplomacy/initialization.py @@ -158,7 +158,7 @@ async def initialize_agent_state_ext( # Fallback if LLM data was not applied or parsing failed if not initial_goals_applied: if not agent.goals: # Only set defaults if no goals were set during agent construction or by LLM - agent.goals = ["Survive and expand", "Form beneficial alliances", "Secure key territories"] + agent.goals = [] agent.add_journal_entry(f"[{current_phase}] Set default initial goals as LLM provided none or parse failed.") logger.info(f"[{power_name}] Default goals set.") @@ -180,7 +180,7 @@ async def initialize_agent_state_ext( success_status = f"Failure: Exception ({type(e).__name__})" # Fallback logic for goals/relationships if not already set by earlier fallbacks if not agent.goals: - agent.goals = ["Survive and expand", "Form beneficial alliances", "Secure key territories"] + agent.goals = [] logger.info(f"[{power_name}] Set fallback goals after top-level error: {agent.goals}") if not agent.relationships or all(r == "Neutral" for r in agent.relationships.values()): agent.relationships = {p: "Neutral" for p in ALL_POWERS if p != power_name} diff --git a/ai_diplomacy/negotiations.py b/ai_diplomacy/negotiations.py index 3259337..524cfaf 100644 --- a/ai_diplomacy/negotiations.py +++ b/ai_diplomacy/negotiations.py @@ -31,6 +31,9 @@ async def conduct_negotiations( Conducts a round-robin conversation among all non-eliminated powers. Each power can send up to 'max_rounds' messages, choosing between private and global messages each turn. Uses asyncio for concurrent message generation. + + NEW: Prevents a power from sending a private message to the same recipient + in two consecutive rounds if that recipient has not replied yet. """ logger.info("Starting negotiation phase.") @@ -43,6 +46,11 @@ async def conduct_negotiations( else: logger.info("No eliminated powers yet.") + # ── new tracking for consecutive private messages ─────────────── + last_sent_round: Dict[tuple[str, str], int] = {} + awaiting_reply: Dict[tuple[str, str], bool] = {} + # ──────────────────────────────────────────────────────────────── + # We do up to 'max_rounds' single-message turns for each power for round_index in range(max_rounds): logger.info(f"Negotiation Round {round_index + 1}/{max_rounds}") @@ -99,14 +107,13 @@ async def conduct_negotiations( if isinstance(result, Exception): logger.error(f"Error getting conversation reply for {power_name}: {result}", exc_info=result) - # Use model_name for stats key if possible if model_name in model_error_stats: model_error_stats[model_name]["conversation_errors"] += 1 - else: # Fallback to power_name if model name not tracked (shouldn't happen) + else: model_error_stats.setdefault(power_name, {}).setdefault("conversation_errors", 0) model_error_stats[power_name]["conversation_errors"] += 1 - messages = [] # Treat as no messages on error - elif result is None: # Handle case where client might return None on internal error + messages = [] + elif result is None: logger.warning(f"Received None instead of messages for {power_name}.") messages = [] if model_name in model_error_stats: @@ -115,48 +122,65 @@ async def conduct_negotiations( model_error_stats.setdefault(power_name, {}).setdefault("conversation_errors", 0) model_error_stats[power_name]["conversation_errors"] += 1 else: - messages = result # result is the list of message dicts + messages = result logger.debug(f"Received {len(messages)} message(s) from {power_name}.") - # Process the received messages (same logic as before) - if messages: - for message in messages: - # Validate message structure - if not isinstance(message, dict) or "content" not in message: - logger.warning(f"Invalid message format received from {power_name}: {message}. Skipping.") - continue - - # Create an official message in the Diplomacy engine - # Determine recipient based on message type - if message.get("message_type") == "private": - recipient = normalize_recipient_name(message.get("recipient", GLOBAL)) # Default to GLOBAL if recipient missing somehow - if recipient not in game.powers and recipient != GLOBAL: - logger.warning(f"Invalid recipient '{recipient}' in message from {power_name}. Sending globally.") - recipient = GLOBAL # Fallback to GLOBAL if recipient power is invalid - else: # Assume global if not private or type is missing - recipient = GLOBAL - - diplo_message = Message( - phase=game.current_short_phase, - sender=power_name, - recipient=recipient, # Use determined recipient - message=message.get("content", ""), # Use .get for safety - time_sent=None, # Let the engine assign time - ) - game.add_message(diplo_message) - # Also add to our custom history - game_history.add_message( - game.current_short_phase, - power_name, - recipient, # Use determined recipient here too - message.get("content", ""), # Use .get for safety - ) - journal_recipient = f"to {recipient}" if recipient != GLOBAL else "globally" - agent.add_journal_entry(f"Sent message {journal_recipient} in {game.current_short_phase}: {message.get('content', '')[:100]}...") - logger.info(f"[{power_name} -> {recipient}] {message.get('content', '')[:100]}...") - else: + if not messages: logger.debug(f"No valid messages returned or error occurred for {power_name}.") - # Error stats handled above based on result type + continue + + for message in messages: + if not isinstance(message, dict) or "content" not in message: + logger.warning(f"Invalid message format received from {power_name}: {message}. Skipping.") + continue + + # Determine recipient + if message.get("message_type") == "private": + recipient = normalize_recipient_name(message.get("recipient", GLOBAL)) + if recipient not in game.powers and recipient != GLOBAL: + logger.warning(f"Invalid recipient '{recipient}' in message from {power_name}. Sending globally.") + recipient = GLOBAL + else: + recipient = GLOBAL + + # ── repetition guard for private messages ───────────── + if recipient != GLOBAL: + pair = (power_name, recipient) + if awaiting_reply.get(pair, False) and last_sent_round.get(pair) == round_index - 1: + logger.info( + f"Discarding repeat private message from {power_name} to {recipient} " + f"(waiting for reply since last round)." + ) + continue # skip this message + + # record outbound and set waiting flag + last_sent_round[pair] = round_index + awaiting_reply[pair] = True + # recipient has now been contacted; when they respond, we'll clear the flag for the reverse pair + awaiting_reply[(recipient, power_name)] = False + # ───────────────────────────────────────────────────── + + diplo_message = Message( + phase=game.current_short_phase, + sender=power_name, + recipient=recipient, + message=message.get("content", ""), + time_sent=None, + ) + game.add_message(diplo_message) + game_history.add_message( + game.current_short_phase, + power_name, + recipient, + message.get("content", ""), + ) + journal_recipient = f"to {recipient}" if recipient != GLOBAL else "globally" + agent.add_journal_entry( + f"Sent message {journal_recipient} in {game.current_short_phase}: " + f"{message.get('content', '')[:100]}..." + ) + logger.info(f"[{power_name} -> {recipient}] {message.get('content', '')[:100]}...") logger.info("Negotiation phase complete.") return game_history + diff --git a/ai_diplomacy/prompt_constructor.py b/ai_diplomacy/prompt_constructor.py index cd34f50..f0c523d 100644 --- a/ai_diplomacy/prompt_constructor.py +++ b/ai_diplomacy/prompt_constructor.py @@ -214,14 +214,10 @@ def construct_order_generation_prompt( include_messages=not _use_simple, # include only when *not* simple ) - # Append goals at the end for focus - goals_section = "" - if agent_goals: - goals_section = ( - "\n\nYOUR STRATEGIC GOALS:\n" + "\n".join(f"- {g}" for g in agent_goals) + "\n\nKeep these goals in mind when choosing your orders." - ) + # delete unused section from context: + context = context.replace('Messages This Round\n\n\nEnd Messages', '') - final_prompt = system_prompt + "\n\n" + context + "\n\n" + instructions + goals_section + final_prompt = system_prompt + "\n\n" + context + "\n\n" + instructions # Make the power names more LLM friendly final_prompt = ( diff --git a/ai_diplomacy/prompts/phase_result_diary_prompt.txt b/ai_diplomacy/prompts/phase_result_diary_prompt.txt index 1cb94ed..906633a 100644 --- a/ai_diplomacy/prompts/phase_result_diary_prompt.txt +++ b/ai_diplomacy/prompts/phase_result_diary_prompt.txt @@ -17,9 +17,6 @@ YOUR RELATIONSHIPS BEFORE THIS PHASE YOUR GOALS {agent_goals} -YOUR ACTUAL ORDERS -{your_actual_orders} - TASK Analyze what actually happened this phase compared to negotiations and expectations. diff --git a/ai_diplomacy/prompts_simple/context_prompt.txt b/ai_diplomacy/prompts_simple/context_prompt.txt index aa7612a..d8c8430 100644 --- a/ai_diplomacy/prompts_simple/context_prompt.txt +++ b/ai_diplomacy/prompts_simple/context_prompt.txt @@ -8,11 +8,11 @@ Phase: {current_phase} Note: You can only build units in your home centers if they are empty. If you lose control of a home center, you cannot build units there, so holding them is critical. # Player Status -Current Goals: {agent_goals} -Relationships: {agent_relationships} +Current Goals: +{agent_goals} -# Recent Private Diary Entries (Your inner thoughts and plans): -{agent_private_diary} +# Relationships: +{agent_relationships} # Order History {order_history} @@ -28,6 +28,9 @@ Possible Orders For {current_phase} {possible_orders} End Possible Orders +# Recent Private Diary Entries (Your inner thoughts and plans): +{agent_private_diary} + Messages This Round {messages_this_round} End Messages \ No newline at end of file diff --git a/ai_diplomacy/prompts_simple/diary_consolidation_prompt.txt b/ai_diplomacy/prompts_simple/diary_consolidation_prompt.txt index 9cc34d7..6538041 100644 --- a/ai_diplomacy/prompts_simple/diary_consolidation_prompt.txt +++ b/ai_diplomacy/prompts_simple/diary_consolidation_prompt.txt @@ -4,24 +4,15 @@ Your Power: {power_name} GAME CONTEXT You are playing Diplomacy, a strategic board game set in pre-WWI Europe. Seven powers compete for control by conquering supply centers. Victory requires 18 supply centers. -Key game mechanics: -- Spring (S) and Fall (F) movement phases where armies/fleets move -- Fall phases include builds/disbands based on supply center control -- Units can support, convoy, or attack -- All orders resolve simultaneously -- Success often requires negotiated coordination with other powers - FULL DIARY HISTORY {full_diary_text} TASK -Create a comprehensive consolidated summary of the most important parts of this diary history. It will serve as your long-term memory. +Create a concise consolidated summary of the most important parts of this diary history. It will serve as your long-term memory. Do not include anything that is not strategically or diplomatically useful going forward. Aim for 300 words. Prioritize the following: -1. **Recent Events, Goals & Intentions** -2. **Long-Term Strategy:** Enduring goals, rivalries, and alliances that are still relevant. -3. **Key Historical Events:** Major betrayals, decisive battles, and significant turning points that shape the current diplomatic landscape. -4. **Important Notes:** Any notes you deem important from the history not already included. +1. **Key Historical Diplomatic Events:** Prioritise both *strategically impactful* and *recent* events. +2. **Information that has ongoing importance & usefulness** RESPONSE FORMAT Return ONLY the consolidated summary text. Do not include JSON, formatting markers, or meta-commentary. \ No newline at end of file diff --git a/ai_diplomacy/prompts_simple/negotiation_diary_prompt.txt b/ai_diplomacy/prompts_simple/negotiation_diary_prompt.txt index e4b824e..377ee86 100644 --- a/ai_diplomacy/prompts_simple/negotiation_diary_prompt.txt +++ b/ai_diplomacy/prompts_simple/negotiation_diary_prompt.txt @@ -2,35 +2,45 @@ NEGOTIATION SUMMARY REQUEST Power: {power_name} Phase: {current_phase} -MESSAGES THIS ROUND -{messages_this_round} - -CURRENT STATUS -Goals: +Goals (may need updating): {agent_goals} -Relationships: +Relationships (may need updating): {agent_relationships} Game State: {board_state_str} +Private Diary: +{private_diary_summary} + +Messages This Round: +{messages_this_round} + + + TASK Analyze the negotiations, goals, relationships, and game state to: -1. Summarize key outcomes and agreements -2. State your specific intents for {current_phase}, including moves you have agreed to in negotiations and whether you intend to fulfil them. +1. Summarize key outcomes and agreements concisely +2. Concisely state your specific intents for {current_phase}, including moves you have agreed to in negotiations and whether you intend to fulfil them. 3. Update relationships as needed (Enemy, Unfriendly, Neutral, Friendly, Ally) -4. Important: You will not see the full negotiation log in the order decision phase, so you must transmit key information about the negotiations to your future self via this summary. +4. Include your latest overarching goals (including any updates) +5. Important: You will not see the full negotiation log in the order decision phase, so you must transmit key information about the negotiations to your future self via this summary. RESPONSE FORMAT Return ONLY a JSON object with this structure: -{ -"negotiation_summary": "Key outcomes from negotiations", -"intent": "Specific intent for upcoming orders", -"updated_relationships": { -"POWER_NAME": "Enemy|Unfriendly|Neutral|Friendly|Ally" -} -} +{{ + "negotiation_summary": "Key outcomes from negotiations", + "intent": "Specific intent for upcoming orders this phase", + "updated_relationships": {{ + "POWER_NAME": "Enemy|Unfriendly|Neutral|Friendly|Ally" + }}, + "goals": [ + "goal 1", + "goal 2", + ... + ] +}} Reminder: If you need to quote something, only use single quotes in the actual messages so as not to interfere with the JSON structure. \ No newline at end of file diff --git a/ai_diplomacy/prompts_simple/phase_result_diary_prompt.txt b/ai_diplomacy/prompts_simple/phase_result_diary_prompt.txt index 1cb94ed..b992ed8 100644 --- a/ai_diplomacy/prompts_simple/phase_result_diary_prompt.txt +++ b/ai_diplomacy/prompts_simple/phase_result_diary_prompt.txt @@ -1,7 +1,13 @@ PHASE RESULT ANALYSIS -Power: {power_name} +Your Power: {power_name} Phase: {current_phase} +RECENT DIARY ENTRIES +{formatted_diary} + +BOARD STATE +{board_state} + PHASE SUMMARY {phase_summary} @@ -17,9 +23,6 @@ YOUR RELATIONSHIPS BEFORE THIS PHASE YOUR GOALS {agent_goals} -YOUR ACTUAL ORDERS -{your_actual_orders} - TASK Analyze what actually happened this phase compared to negotiations and expectations. @@ -29,12 +32,12 @@ Consider: 3. SURPRISES: What unexpected moves occurred? 4. IMPACT: How did these events affect your strategic position? -Write a reflective diary entry (150-250 words) that: -- Identifies key betrayals or successful collaborations -- Assesses impact on your position -- Updates your understanding of other powers' trustworthiness -- Notes strategic lessons learned -- Adjusts your perception of threats and opportunities +Write a concise diary entry (100-150 words) of the most important things you would like to remember, e.g.: +- Key betrayals or successful collaborations +- Assess impact on your position +- Update your understanding of other powers' trustworthiness +- Strategic lessons learned +- Moves that failed, and ideas on how to avoid the error in the future Focus on concrete events and their implications for your future strategy. diff --git a/analysis/statistical_game_analysis.py b/analysis/statistical_game_analysis.py index a9a65cd..98f645e 100644 --- a/analysis/statistical_game_analysis.py +++ b/analysis/statistical_game_analysis.py @@ -69,6 +69,12 @@ class StatisticalGameAnalyzer: 'order_generation', 'order_diary', 'state_update_parsing_empty_or_invalid_data', 'diary_consolidation', 'state_update_partial_data', 'state_update_no_response' ] + + ORDER_TYPES = [ + "move", "hold", "support", "convoy", + "build", "disband", "waive", "other", + "retreat" + ] def __init__(self): """Initialize analyzer with configuration constants.""" @@ -234,6 +240,103 @@ class StatisticalGameAnalyzer: return responses + def _extract_order_results_features(self, power: str, phase_data: dict) -> dict: + """ + Count orders and outcomes for a single power in one phase and add + a success-rate (0-1) for every order type. + """ + features: dict[str, float | int] = {} + for ot in self.ORDER_TYPES: + plural = f"{ot}s" if not ot.endswith("s") else ot + for metric in ("total", "success", "bounce", "void", "invalid"): + features[f"orders_{plural}_{metric}"] = 0 + features[f"orders_{plural}_success_rate"] = 0.0 # ← new + + orders_by_type = phase_data.get("order_results", {}).get(power, {}) + if not orders_by_type: + return features + + for otype, order_list in orders_by_type.items(): + otype = otype.lower() + if otype not in self.ORDER_TYPES: + otype = "other" + plural = f"{otype}s" if not otype.endswith("s") else otype + + for entry in order_list: + result = str(entry.get("result", "")).lower().strip() + key_base = f"orders_{plural}" + features[f"{key_base}_total"] += 1 + match result: + case "success": + features[f"{key_base}_success"] += 1 + case "bounce": + features[f"{key_base}_bounce"] += 1 + case "invalid": + features[f"{key_base}_invalid"] += 1 + case _ if result in ("void", "void: no effect", ""): + features[f"{key_base}_void"] += 1 + + # ── derive success rates ── + for ot in self.ORDER_TYPES: + plural = f"{ot}s" if not ot.endswith("s") else ot + succ = features[f"orders_{plural}_success"] + tot = features[f"orders_{plural}_total"] + features[f"orders_{plural}_success_rate"] = succ / tot if tot else 0.0 + + return features + + + + # ────────────────── GAME-LEVEL ORDER TOTALS ────────────────── + def _aggregate_order_results(self, power: str, game_data: dict) -> dict: + """ + Sum every order-type/result pair over *all* phases for one power + and add success-rate (0-1) columns. + """ + totals: dict[str, float | int] = {} + for ot in self.ORDER_TYPES: + plural = f"{ot}s" if not ot.endswith("s") else ot + for metric in ("total", "success", "bounce", "void", "invalid"): + totals[f"orders_{plural}_{metric}"] = 0 + totals[f"orders_{plural}_success_rate"] = 0.0 # ← new + + for phase in game_data.get("phases", []): + orders_by_type = phase.get("order_results", {}).get(power, {}) + if not orders_by_type: + continue + + for otype, order_list in orders_by_type.items(): + otype = otype.lower() + if otype not in self.ORDER_TYPES: + otype = "other" + plural = f"{otype}s" if not otype.endswith("s") else otype + + for entry in order_list: + result = str(entry.get("result", "")).lower().strip() + key_base = f"orders_{plural}" + totals[f"{key_base}_total"] += 1 + match result: + case "success": + totals[f"{key_base}_success"] += 1 + case "bounce": + totals[f"{key_base}_bounce"] += 1 + case "invalid": + totals[f"{key_base}_invalid"] += 1 + case _ if result in ("void", "void: no effect", ""): + totals[f"{key_base}_void"] += 1 + + # ── derive success rates ── + for ot in self.ORDER_TYPES: + plural = f"{ot}s" if not ot.endswith("s") else ot + succ = totals[f"orders_{plural}_success"] + tot = totals[f"orders_{plural}_total"] + totals[f"orders_{plural}_success_rate"] = succ / tot if tot else 0.0 + + return totals + + + + def _extract_phase_features(self, llm_responses: List[dict], game_data: dict) -> List[dict]: """Extract phase-level features for all powers, phases, and response types.""" phase_features = [] @@ -294,6 +397,10 @@ class StatisticalGameAnalyzer: # === FAILURE ANALYSIS (HARD MODE) === failure_metrics = self._analyze_failures(power, phase, response_type, llm_responses) features.update(failure_metrics) + + # === ORDER-RESULT METRICS === + order_result_features = self._extract_order_results_features(power, phase_data) + features.update(order_result_features) # Add response-type specific features @@ -794,7 +901,10 @@ class StatisticalGameAnalyzer: if total_calls > 0: features['overall_failure_rate_percentage'] = (total_failures / total_calls) * 100.0 features['overall_success_rate_percentage'] = (total_successes / total_calls) * 100.0 - + + # === ORDER TOTALS (whole game) === + order_totals = self._aggregate_order_results(power, game_data) + features.update(order_totals) # Helper methods @@ -1067,6 +1177,15 @@ class StatisticalGameAnalyzer: 'military_units_gained_vs_prev_phase', 'relationships' ] + + # ensure order columns + for ot in self.ORDER_TYPES: + plural = f"{ot}s" if not ot.endswith("s") else ot + for suffix in ("total", "success", "bounce", "void", "invalid", "success_rate"): + col = f"orders_{plural}_{suffix}" + if col not in fieldnames: + fieldnames.append(col) + # Ensure all actual fields are included (in case we missed any) actual_fields = set() @@ -1140,6 +1259,17 @@ class StatisticalGameAnalyzer: # === Diplobench style single scalar game score === 'game_score', ] + + # ensure order-total columns + for ot in self.ORDER_TYPES: + plural = f"{ot}s" if not ot.endswith("s") else ot + base = f"orders_{plural}_total" + for suffix in ("total", "success", "bounce", "void", "invalid", "success_rate"): + col = f"orders_{plural}_{suffix}" + if col not in fieldnames: + fieldnames.append(col) + + # Ensure all actual fields are included actual_fields = set() diff --git a/experiment_runner/analysis/statistical_game_analysis.py b/experiment_runner/analysis/statistical_game_analysis.py index 5ec0ca4..cbe083f 100644 --- a/experiment_runner/analysis/statistical_game_analysis.py +++ b/experiment_runner/analysis/statistical_game_analysis.py @@ -227,6 +227,8 @@ def _plot_relationships_per_game( # ── NEW: discard rows with no relationship info ──────────── game_df = game_df[game_df["rel_dict"].apply(bool)] + # ── keep only MOVE phases; drop retreat (R) and adjustment (A) ───── + game_df = game_df[game_df["game_phase"].str.upper().str.endswith("M")] if game_df.empty: # nothing left to plot continue @@ -334,9 +336,18 @@ def _plot_relationships_per_game( else to_rgba(base_colour, alpha=0.35) ) + # ── “double” a lone point so it shows up as a short flat line ── + finite_pts = [(x, y) for x, y in zip(data["x"], y_off) if not math.isnan(y)] + if len(finite_pts) == 1: + x0, y0 = finite_pts[0] + xs = [x0 - 0.05, x0 + 0.05] # tiny horizontal spread + ys = [y0, y0] + else: + xs, ys = data["x"], y_off + plt.plot( - data["x"], - y_off, + xs, + ys, label=f"{other} ({kind})", color=colour, linewidth=2, diff --git a/experiment_runner/analysis/summary.py b/experiment_runner/analysis/summary.py index e26a687..b8e4f12 100644 --- a/experiment_runner/analysis/summary.py +++ b/experiment_runner/analysis/summary.py @@ -169,6 +169,7 @@ def run(exp_dir: Path, ctx: dict): # pylint: disable=unused-argument sns.set_style("whitegrid") plt.figure(figsize=(10, 7)) sns.boxplot(x="Power", y="SupplyCenters", data=df, palette="pastel") + plt.ylim(0, 18) plt.title("Supply-center distribution") plt.savefig(analysis_dir / "results_summary.png", dpi=150) plt.close() diff --git a/lm_game.py b/lm_game.py index 2a30f1a..7407f61 100644 --- a/lm_game.py +++ b/lm_game.py @@ -334,6 +334,17 @@ async def main(): if neg_diary_tasks: await asyncio.gather(*neg_diary_tasks, return_exceptions=True) + # Diary Consolidation + if current_short_phase.startswith("S") and current_short_phase.endswith("M"): + consolidation_tasks = [ + run_diary_consolidation(agent, game, llm_log_file_path, + prompts_dir=agent.prompts_dir) + for agent in agents.values() + if not game.powers[agent.power_name].is_eliminated() + ] + if consolidation_tasks: + await asyncio.gather(*consolidation_tasks, return_exceptions=True) + # --- 4c. Order Generation --- logger.info("Getting orders from agents...") board_state = game.get_state() @@ -350,7 +361,7 @@ async def main(): game, agent.client, board_state, power_name, possible_orders, game_history, model_error_stats, agent_goals=agent.goals, agent_relationships=agent.relationships, - agent_private_diary_str=agent.format_private_diary_for_prompt(), + agent_private_diary_str=agent.get_latest_phase_diary_entries(), # only include latest phase in orders prompt log_file_path=llm_log_file_path, phase=current_phase, ) ) @@ -378,10 +389,11 @@ async def main(): submitted_orders_this_phase[p_name] = valid + invalid # diary entry only for the orders we tried to submit - if valid or invalid: - await agents[p_name].generate_order_diary_entry( - game, valid + invalid, llm_log_file_path - ) + if False: # disabled for now + if valid or invalid: + await agents[p_name].generate_order_diary_entry( + game, valid + invalid, llm_log_file_path + ) # --- 4d. Process Phase --- completed_phase = current_phase @@ -414,26 +426,18 @@ async def main(): all_orders_this_phase = game.order_history.get(current_short_phase, {}) # Phase Result Diary Entries - phase_result_diary_tasks = [ - agent.generate_phase_result_diary_entry(game, game_history, phase_summary, all_orders_this_phase, llm_log_file_path) - for agent in agents.values() if not game.powers[agent.power_name].is_eliminated() - ] - if phase_result_diary_tasks: - await asyncio.gather(*phase_result_diary_tasks, return_exceptions=True) - - # Diary Consolidation - if current_short_phase.startswith("S") and current_short_phase.endswith("M"): - consolidation_tasks = [ - run_diary_consolidation(agent, game, llm_log_file_path, - prompts_dir=agent.prompts_dir) - for agent in agents.values() - if not game.powers[agent.power_name].is_eliminated() + if current_short_phase.endswith("M"): + phase_result_diary_tasks = [ + agent.generate_phase_result_diary_entry(game, game_history, phase_summary, all_orders_this_phase, llm_log_file_path, current_short_phase) + for agent in agents.values() if not game.powers[agent.power_name].is_eliminated() ] - if consolidation_tasks: - await asyncio.gather(*consolidation_tasks, return_exceptions=True) + if phase_result_diary_tasks: + await asyncio.gather(*phase_result_diary_tasks, return_exceptions=True) + + # Agent State Updates - if current_short_phase.endswith("M"): + if current_short_phase.endswith("M") and run_config.num_negotiation_rounds == 0: # r'ships are updated in negotiation round. otherwise in no press, updated in a separate step. current_board_state = game.get_state() state_update_tasks = [ agent.analyze_phase_and_update_state(game, current_board_state, phase_summary, game_history, llm_log_file_path)