state update fixes & streamline prompts

2026-04-19 12:58:09 +00:00 · 2025-07-12 10:17:17 +10:00 · 2025-07-12 10:17:17 +10:00 · b4a56126ec
commit b4a56126ec
parent 1f154a7073
17 changed files with 710 additions and 281 deletions
--- a/ai_diplomacy/agent.py
+++ b/ai_diplomacy/agent.py
@ -97,6 +97,26 @@ class DiplomacyAgent:
        logger.info(f"Initialized DiplomacyAgent for {self.power_name} with goals: {self.goals}")
        self.add_journal_entry(f"Agent initialized. Initial Goals: {self.goals}")
    def _format_board_state(self, board_state_dict):
        units = board_state_dict.get('units', {})
        centers = board_state_dict.get('centers', {})
        eliminated = {power for power, scs in centers.items() if not scs}
        parts = ["Units:"]
        for power, unit_list in sorted(units.items()):
            label = f"{power} (Eliminated)" if power in eliminated else power
            parts.append(f"  {label}: {', '.join(unit_list)}")
        parts.append("Centers:")
        for power, center_list in sorted(centers.items()):
            label = f"{power} (Eliminated)" if power in eliminated else power
            parts.append(f"  {label}: {', '.join(center_list)}")
        return "\n".join(parts)
    def _extract_json_from_text(self, text: str) -> dict:
        """Extract and parse JSON from text, handling common LLM response formats."""
        if not text or not text.strip():
@ -368,6 +388,46 @@ class DiplomacyAgent:
            f"[{self.power_name}] DIARY ENTRY ADDED for {phase}. Total full entries: {len(self.full_private_diary)}. New entry: {entry[:100]}..."
        )
    def get_latest_phase_diary_entries(
        self,
        *,
        use_private_diary: bool = False,
        separator: str = "\n\n",
    ) -> str:
        """
        Return all diary entries for the most-recent phase.
        Args:
            use_private_diary: If True look at self.private_diary, otherwise
                               self.full_private_diary (default).
            separator: String to place between entries in the final output.
        Returns:
            A single formatted string containing every entry from the
            latest phase, or an empty string if no diary content exists.
        """
        diary: List[str] = self.private_diary if use_private_diary else self.full_private_diary
        if not diary:
            return ""
        # Expect entries like "[S1901M] text…"
        phase_match = re.match(r"\[([^\]]+)\]", diary[-1])
        if not phase_match:
            # Last line didn’t start with a phase tag; just return it.
            return diary[-1]
        latest_phase = phase_match.group(1)
        recent_entries: List[str] = []
        for entry in reversed(diary):
            if entry.startswith(f"[{latest_phase}]"):
                recent_entries.append(entry)
            else:
                break
        recent_entries.reverse()  # restore chronological order
        return separator.join(recent_entries)
    def format_private_diary_for_prompt(self) -> str:
        """
        Formats the context diary for inclusion in a prompt.
@ -437,12 +497,12 @@ class DiplomacyAgent:
            # Prepare context for the prompt
            board_state_dict = game.get_state()
-            board_state_str = f"Units: {board_state_dict.get('units', {})}, Centers: {board_state_dict.get('centers', {})}"
+            board_state_str = self._format_board_state(board_state_dict)
            messages_this_round = game_history.get_messages_this_round(power_name=self.power_name, current_phase_name=game.current_short_phase)
            if not messages_this_round.strip() or messages_this_round.startswith("\n(No messages"):
                messages_this_round = (
-                    "(No messages involving your power this round that require deep reflection for diary. Focus on overall situation.)"
+                    "(No messages involving your power this round.)"
                )
            current_relationships_str = json.dumps(self.relationships)
@ -463,31 +523,34 @@ class DiplomacyAgent:
            # Do aggressive preprocessing of the template to fix the problematic patterns
            # This includes removing any newlines or whitespace before JSON keys that cause issues
-            for pattern in ["negotiation_summary", "updated_relationships", "relationship_updates", "intent"]:
+            if False:
-                # Fix the "\n  "key"" pattern that breaks .format()
+                for pattern in ["negotiation_summary", "updated_relationships", "relationship_updates", "intent"]:
-                prompt_template_content = re.sub(rf'\n\s*"{pattern}"', f'"{pattern}"', prompt_template_content)
+                    # Fix the "\n  "key"" pattern that breaks .format()
                    prompt_template_content = re.sub(rf'\n\s*"{pattern}"', f'"{pattern}"', prompt_template_content)
-            # Escape all curly braces in JSON examples to prevent format() from interpreting them
+                # Escape all curly braces in JSON examples to prevent format() from interpreting them
-            # First, temporarily replace the actual template variables
+                # First, temporarily replace the actual template variables
-            temp_vars = [
+            
-                "power_name",
+                temp_vars = [
-                "current_phase",
+                    "power_name",
-                "messages_this_round",
+                    "current_phase",
-                "agent_goals",
+                    "messages_this_round",
-                "agent_relationships",
+                    "agent_goals",
-                "board_state_str",
+                    "agent_relationships",
-                "ignored_messages_context",
+                    "board_state_str",
-            ]
+                    "ignored_messages_context",
-            for var in temp_vars:
+                    "private_diary_summary",
-                prompt_template_content = prompt_template_content.replace(f"{{{var}}}", f"<<{var}>>")
+                ]
                for var in temp_vars:
                    prompt_template_content = prompt_template_content.replace(f"{{{var}}}", f"<<{var}>>")
-            # Now escape all remaining braces (which should be JSON)
+                # Now escape all remaining braces (which should be JSON)
-            prompt_template_content = prompt_template_content.replace("{", "{{")
+                prompt_template_content = prompt_template_content.replace("{", "{{")
-            prompt_template_content = prompt_template_content.replace("}", "}}")
+                prompt_template_content = prompt_template_content.replace("}", "}}")
-            # Restore the template variables
+                # Restore the template variables
-            for var in temp_vars:
+                for var in temp_vars:
-                prompt_template_content = prompt_template_content.replace(f"<<{var}>>", f"{{{var}}}")
+                    prompt_template_content = prompt_template_content.replace(f"<<{var}>>", f"{{{var}}}")
            # Create a dictionary with safe values for formatting
            format_vars = {
@ -515,8 +578,6 @@ class DiplomacyAgent:
            logger.debug(f"[{self.power_name}] Negotiation diary prompt:\n{full_prompt[:500]}...")
            logger.debug(f"[{self.power_name}] Negotiation diary prompt:\n{full_prompt[:500]}...")
            raw_response = await run_llm_and_log(
                client=self.client,
                prompt=full_prompt,
@ -567,7 +628,6 @@ class DiplomacyAgent:
                        diary_text_candidate = parsed_data["intent"]
                    else:
                        diary_text_candidate += "\nIntent: " + parsed_data["intent"]
                if diary_text_candidate:
                    diary_entry_text = diary_text_candidate
                else:
@ -610,6 +670,10 @@ class DiplomacyAgent:
                elif new_relationships is not None:  # It was provided but not a dict
                    logger.warning(f"[{self.power_name}] 'updated_relationships' from diary LLM was not a dictionary: {type(new_relationships)}")
                # update goals
                if "goals" in parsed_data:
                    self.update_goals(parsed_data["goals"])
            # Add the generated (or fallback) diary entry
            self.add_diary_entry(diary_entry_text, game.current_short_phase)
            if relationships_updated:
@ -627,16 +691,19 @@ class DiplomacyAgent:
            self.add_diary_entry(f"(Error generating diary entry: {type(e).__name__})", game.current_short_phase)
        finally:
            if log_file_path:  # Ensure log_file_path is provided
-                log_llm_response(
+                try:
-                    log_file_path=log_file_path,
+                    log_llm_response(
-                    model_name=self.client.model_name if self.client else "UnknownModel",
+                        log_file_path=log_file_path,
-                    power_name=self.power_name,
+                        model_name=self.client.model_name if self.client else "UnknownModel",
-                    phase=game.current_short_phase if game else "UnknownPhase",
+                        power_name=self.power_name,
-                    response_type="negotiation_diary",  # Specific type for CSV logging
+                        phase=game.current_short_phase if game else "UnknownPhase",
-                    raw_input_prompt=full_prompt,
+                        response_type="negotiation_diary",  # Specific type for CSV logging
-                    raw_response=raw_response,
+                        raw_input_prompt=full_prompt,
-                    success=success_status,
+                        raw_response=raw_response,
-                )
+                        success=success_status,
                    )
                except Exception as e:
                    print(e)
    async def generate_order_diary_entry(self, game: "Game", orders: List[str], log_file_path: str):
        """
@ -783,105 +850,108 @@ class DiplomacyAgent:
        # Rest of the code remains the same
    async def generate_phase_result_diary_entry(
-        self, game: "Game", game_history: "GameHistory", phase_summary: str, all_orders: Dict[str, List[str]], log_file_path: str
+        self, game: "Game", game_history: "GameHistory", phase_summary: str, all_orders: Dict[str, List[str]], log_file_path: str, phase_name: str
    ):
        """
        Generates a diary entry analyzing the actual phase results,
        comparing them to negotiations and identifying betrayals/collaborations.
        """
        logger.info(f"[{self.power_name}] Generating phase result diary entry for {game.current_short_phase}...")
        # Load the template
        prompt_template = load_prompt("phase_result_diary_prompt.txt", prompts_dir=self.prompts_dir)
        if not prompt_template:
            logger.error(f"[{self.power_name}] Could not load phase_result_diary_prompt.txt. Skipping diary entry.")
            return
        # Format all orders for the prompt
        all_orders_formatted = ""
        for power, orders in all_orders.items():
            orders_str = ", ".join(orders) if orders else "No orders"
            all_orders_formatted += f"{power}: {orders_str}\n"
        # Get your own orders
        your_orders = all_orders.get(self.power_name, [])
        your_orders_str = ", ".join(your_orders) if your_orders else "No orders"
        # Get recent negotiations for this phase
        messages_this_phase = game_history.get_messages_by_phase(game.current_short_phase)
        your_negotiations = ""
        for msg in messages_this_phase:
            if msg.sender == self.power_name:
                your_negotiations += f"To {msg.recipient}: {msg.content}\n"
            elif msg.recipient == self.power_name:
                your_negotiations += f"From {msg.sender}: {msg.content}\n"
        if not your_negotiations:
            your_negotiations = "No negotiations this phase"
        # Format relationships
        relationships_str = "\n".join([f"{p}: {r}" for p, r in self.relationships.items()])
        # Format goals
        goals_str = "\n".join([f"- {g}" for g in self.goals]) if self.goals else "None"
        # Create the prompt
        prompt = prompt_template.format(
            power_name=self.power_name,
            current_phase=game.current_short_phase,
            phase_summary=phase_summary,
            all_orders_formatted=all_orders_formatted,
            your_negotiations=your_negotiations,
            pre_phase_relationships=relationships_str,
            agent_goals=goals_str,
            your_actual_orders=your_orders_str,
        )
        logger.debug(f"[{self.power_name}] Phase result diary prompt:\n{prompt[:500]}...")
        raw_response = ""
        success_status = "FALSE"
        try:
-            raw_response = await run_llm_and_log(
+            """
-                client=self.client,
+            Generates a diary entry analyzing the actual phase results,
-                prompt=prompt,
+            comparing them to negotiations and identifying betrayals/collaborations.
            """
            logger.info(f"[{self.power_name}] Generating phase result diary entry for {game.current_short_phase}...")
            # Load the template
            prompt_template = load_prompt("phase_result_diary_prompt.txt", prompts_dir=self.prompts_dir)
            if not prompt_template:
                logger.error(f"[{self.power_name}] Could not load phase_result_diary_prompt.txt. Skipping diary entry.")
                return
            # Format all orders for the prompt
            all_orders_formatted = game_history.get_order_history_for_prompt(
                game=game,  # Pass the game object for normalization
                power_name=self.power_name,
-                phase=game.current_short_phase,
+                current_phase_name=game.current_short_phase,
-                response_type="phase_result_diary",
+                num_movement_phases_to_show=1,
            )
-            if raw_response and raw_response.strip():
+            formatted_diary = self.format_private_diary_for_prompt()
-                # The response should be plain text diary entry
+
-                diary_entry = raw_response.strip()
+            board_state_dict = game.get_state()
-                self.add_diary_entry(diary_entry, game.current_short_phase)
+            board_state_str = self._format_board_state(board_state_dict)
-                success_status = "TRUE"
+
-                logger.info(f"[{self.power_name}] Phase result diary entry generated and added.")
+            # Get recent negotiations for this phase
-            else:
+            messages_this_round = game_history.get_messages_this_round(power_name=self.power_name, current_phase_name=game.current_short_phase)
-                fallback_diary = (
+            if not messages_this_round.strip() or messages_this_round.startswith("\n(No messages"):
-                    f"Phase {game.current_short_phase} completed. Orders executed as: {your_orders_str}. (Failed to generate detailed analysis)"
+                messages_this_round = (
                    "(No messages involving your power this round.)"
                )
                self.add_diary_entry(fallback_diary, game.current_short_phase)
                logger.warning(f"[{self.power_name}] Empty response from LLM. Added fallback phase result diary.")
                success_status = "FALSE"
-        except Exception as e:
+            # Format relationships
-            logger.error(f"[{self.power_name}] Error generating phase result diary: {e}", exc_info=True)
+            relationships_str = "\n".join([f"{p}: {r}" for p, r in self.relationships.items()])
-            fallback_diary = f"Phase {game.current_short_phase} completed. Unable to analyze results due to error."
+
-            self.add_diary_entry(fallback_diary, game.current_short_phase)
+            # Format goals
-            success_status = f"FALSE: {type(e).__name__}"
+            goals_str = "\n".join([f"- {g}" for g in self.goals]) if self.goals else "None"
-        finally:
+
-            log_llm_response(
+            # Create the prompt
-                log_file_path=log_file_path,
+            prompt = prompt_template.format(
                model_name=self.client.model_name,
                power_name=self.power_name,
-                phase=game.current_short_phase,
+                current_phase=phase_name,
-                response_type="phase_result_diary",
+                phase_summary=phase_summary,
-                raw_input_prompt=prompt,
+                all_orders_formatted=all_orders_formatted,
-                raw_response=raw_response,
+                your_negotiations=messages_this_round,
-                success=success_status,
+                pre_phase_relationships=relationships_str,
                agent_goals=goals_str,
                formatted_diary=formatted_diary,
                board_state=board_state_str,
            )
            logger.debug(f"[{self.power_name}] Phase result diary prompt:\n{prompt[:500]}...")
            raw_response = ""
            success_status = "FALSE"
            try:
                raw_response = await run_llm_and_log(
                    client=self.client,
                    prompt=prompt,
                    power_name=self.power_name,
                    phase=phase_name,
                    response_type="phase_result_diary",
                )
                if raw_response and raw_response.strip():
                    # The response should be plain text diary entry
                    diary_entry = raw_response.strip()
                    self.add_diary_entry(diary_entry, phase_name)
                    success_status = "TRUE"
                    logger.info(f"[{self.power_name}] Phase result diary entry generated and added.")
                else:
                    fallback_diary = (
                        f"Phase {phase_name} completed."
                    )
                    self.add_diary_entry(fallback_diary, phase_name)
                    logger.warning(f"[{self.power_name}] Empty response from LLM. Added fallback phase result diary.")
                    success_status = "FALSE"
            except Exception as e:
                logger.error(f"[{self.power_name}] Error generating phase result diary: {e}", exc_info=True)
                fallback_diary = f"Phase {phase_name} completed. Unable to analyze results due to error."
                self.add_diary_entry(fallback_diary, phase_name)
                success_status = f"FALSE: {type(e).__name__}"
            finally:
                log_llm_response(
                    log_file_path=log_file_path,
                    model_name=self.client.model_name,
                    power_name=self.power_name,
                    phase=phase_name,
                    response_type="phase_result_diary",
                    raw_input_prompt=prompt,
                    raw_response=raw_response,
                    success=success_status,
                )
        except Exception as e:
            logger.error(e)
            logger.error('!generate_phase_result_diary_entry failed')
    def log_state(self, prefix=""):
        logger.debug(f"[{self.power_name}] {prefix} State: Goals={self.goals}, Relationships={self.relationships}")
--- a/ai_diplomacy/clients.py
+++ b/ai_diplomacy/clients.py
@ -1039,7 +1039,7 @@ class OpenRouterClient(BaseModelClient):
        logger.debug(f"[{self.model_name}] Initialized OpenRouter client")
-    async def generate_response(self, prompt: str, temperature: float = 0.5, inject_random_seed: bool = True) -> str:
+    async def generate_response(self, prompt: str, temperature: float = 0.0, inject_random_seed: bool = True) -> str:
        """Generate a response using OpenRouter with robust error handling."""
        try:
            # Append the call to action to the user's prompt
--- a/ai_diplomacy/diary_logic.py
+++ b/ai_diplomacy/diary_logic.py
@ -11,49 +11,90 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 async def run_diary_consolidation(
    agent: "DiplomacyAgent",
    game: "Game",
    log_file_path: str,
-    entries_to_keep_unsummarized: int = 6,
+    years_to_keep_unsummarised: int = 1,
    prompts_dir: Optional[str] = None,
 ):
    """
    Consolidate older diary entries while keeping recent ones.
-    This is the logic moved from the DiplomacyAgent class.
+
    Parameters
    ----------
    agent : DiplomacyAgent
    game  : Game
    log_file_path : str
    years_to_keep_unsummarised : int, default 1
        Number of *distinct years* whose entries remain verbatim.
    prompts_dir : Optional[str]
    """
-    logger.info(f"[{agent.power_name}] CONSOLIDATION START — {len(agent.full_private_diary)} total full entries")
+    logger.info(
        f"[{agent.power_name}] CONSOLIDATION START — "
        f"{len(agent.full_private_diary)} total full entries"
    )
-    full_entries = [e for e in agent.full_private_diary if not e.startswith("[CONSOLIDATED HISTORY]")]
+    # Remove any earlier consolidated block first
    full_entries = [
        e for e in agent.full_private_diary
        if not e.startswith("[CONSOLIDATED HISTORY]")
    ]
-    if len(full_entries) <= entries_to_keep_unsummarized:
+    if not full_entries:
-        agent.private_diary = list(agent.full_private_diary)
+        agent.private_diary = []
-        logger.info(f"[{agent.power_name}] ≤ {entries_to_keep_unsummarized} full entries — skipping consolidation")
+        logger.warning(f"[{agent.power_name}] No diary entries found")
        return
-    boundary_entry = full_entries[-entries_to_keep_unsummarized]
+    # Extract years by scanning from newest to oldest
-    match = re.search(r"\[[SFWRAB]\s*(\d{4})", boundary_entry)
+    year_re = re.compile(r"\[[SFWRAB]\s*(\d{4})")  # matches “[S1901”, “[F1902”…”
-    if not match:
+    recent_years: list[int] = []
-        logger.error(f"[{agent.power_name}] Could not parse year from boundary entry; aborting consolidation")
+
    for entry in reversed(full_entries):            # newest last
        match = year_re.search(entry)
        if not match:
            # Lines without a year tag are considered “dateless”; keep them
            continue
        yr = int(match.group(1))
        if yr not in recent_years:
            recent_years.append(yr)
        if len(recent_years) >= years_to_keep_unsummarised:
            break
    # If every distinct year falls inside the keep-window, skip consolidation
    all_years = {
        int(m.group(1))
        for e in full_entries
        if (m := year_re.search(e))
    }
    if len(all_years - set(recent_years)) == 0:
        agent.private_diary = list(agent.full_private_diary)
        logger.info(
            f"[{agent.power_name}] ≤ {years_to_keep_unsummarised} distinct years "
            "— skipping consolidation"
        )
        return
-    cutoff_year = int(match.group(1))
+    # Partition entries
-    logger.info(f"[{agent.power_name}] Cut-off year for consolidation: {cutoff_year}")
+    keep_set = set(recent_years)
-    def _entry_year(entry: str) -> int | None:
+    def _entry_year(entry: str) -> Optional[int]:
-        m = re.search(r"\[[SFWRAB]\s*(\d{4})", entry)
+        m = year_re.search(entry)
        return int(m.group(1)) if m else None
-    entries_to_summarize = [e for e in full_entries if (_entry_year(e) is not None and _entry_year(e) < cutoff_year)]
+    entries_to_keep = [e for e in full_entries if (_entry_year(e) in keep_set)]
-    entries_to_keep = [e for e in full_entries if (_entry_year(e) is None or _entry_year(e) >= cutoff_year)]
+    entries_to_summarise = [e for e in full_entries if (_entry_year(e) not in keep_set)]
-    logger.info(f"[{agent.power_name}] Summarising {len(entries_to_summarize)} entries; keeping {len(entries_to_keep)} recent entries verbatim")
+    logger.info(
        f"[{agent.power_name}] Summarising {len(entries_to_summarise)} entries "
        f"from years < {min(keep_set)}; keeping {len(entries_to_keep)} recent entries verbatim"
    )
-    if not entries_to_summarize:
+    if not entries_to_summarise:
        agent.private_diary = list(agent.full_private_diary)
-        logger.warning(f"[{agent.power_name}] No eligible entries to summarise; context diary left unchanged")
+        logger.warning(
            f"[{agent.power_name}] No eligible entries to summarise; context diary left unchanged"
        )
        return
    prompt_template = load_prompt("diary_consolidation_prompt.txt", prompts_dir=prompts_dir)
@ -63,7 +104,7 @@ async def run_diary_consolidation(
    prompt = prompt_template.format(
        power_name=agent.power_name,
-        full_diary_text="\n\n".join(entries_to_summarize),
+        full_diary_text="\n\n".join(entries_to_summarise),
    )
    raw_response = ""
@ -71,7 +112,6 @@ async def run_diary_consolidation(
    consolidation_client = None
    try:
        consolidation_client = agent.client
        raw_response = await run_llm_and_log(
            client=consolidation_client,
            prompt=prompt,
@ -87,14 +127,21 @@ async def run_diary_consolidation(
        new_summary_entry = f"[CONSOLIDATED HISTORY] {consolidated_text}"
        agent.private_diary = [new_summary_entry] + entries_to_keep
        success_flag = "TRUE"
-        logger.info(f"[{agent.power_name}] Consolidation complete — {len(agent.private_diary)} context entries now")
+        logger.info(
            f"[{agent.power_name}] Consolidation complete — "
            f"{len(agent.private_diary)} context entries now"
        )
    except Exception as exc:
        logger.error(f"[{agent.power_name}] Diary consolidation failed: {exc}", exc_info=True)
    finally:
        log_llm_response(
            log_file_path=log_file_path,
-            model_name=(consolidation_client.model_name if consolidation_client is not None else agent.client.model_name),
+            model_name=(
                consolidation_client.model_name
                if consolidation_client is not None
                else agent.client.model_name
            ),
            power_name=agent.power_name,
            phase=game.current_short_phase,
            response_type="diary_consolidation",
--- a/ai_diplomacy/game_history.py
+++ b/ai_diplomacy/game_history.py
@ -182,7 +182,7 @@ class GameHistory:
        eng2code = {"AUSTRIA": "AUT", "ENGLAND": "ENG", "FRANCE": "FRA", "GERMANY": "GER", "ITALY": "ITA", "RUSSIA": "RUS", "TURKEY": "TUR"}
        norm = game.map.norm
-        out_lines = ["**ORDER HISTORY (Recent Rounds)**"]
+        out_lines = []
        for ph in phases_to_report:
            if not (ph.orders_by_power or ph.submitted_orders_by_power):
@ -234,8 +234,14 @@ class GameHistory:
                        tag = "bounce"
                    elif "void" == tag:
                        tag = "void: no effect"
-
+                    
-                    out_lines.append(f"    {order} ({tag})")
+                    # don't show (success) tag for hold moves, it might be causing convergence on
                    # always-hold behaviour
                    is_hold = any(kw in order.upper() for kw in (" H", " HOLD"))
                    if tag == "success" and is_hold:
                        out_lines.append(f"    {order}")
                    else:
                        out_lines.append(f"    {order} ({tag})")
                    seen_ok.add(_norm_keep(order))
                # 2️⃣ invalid submissions
@ -246,6 +252,139 @@ class GameHistory:
            return "\n(No orders were issued in recent history)\n"
        return "\n".join(out_lines)
    def get_orders_history_for_phase(
            self,
            game: "Game",
            phase_name: str,                      # ← the single phase we want
        ) -> Dict[str, Dict[str, List[Dict[str, str]]]]:
        """
        Return the orders for `phase_name` as:
            {
                "<POWER>": {
                    "<order_type>": [
                        {"order": "<order str>", "result": "<result str>"},
                        ...
                    ],
                    ...
                },
                ...
            }
        Order types: move, hold, support, convoy, build, disband, waive, other.
        """
        # ── locate the requested phase ──────────────────────────────
        target_phase = next((p for p in self.phases if p.name == phase_name), None)
        if not target_phase or not (target_phase.orders_by_power or target_phase.submitted_orders_by_power):
            return {}
        # ── helpers ───────────────────────────────────────────────
        def _scalar(res):
            """Flatten lists/dicts to a single outcome token."""
            tag = res
            while isinstance(tag, list):
                tag = tag[0] if tag else ""
            if isinstance(tag, dict):
                tag = tag.get("outcome") or tag.get("result") or ""
            return str(tag).strip().lower()
        def _order_type(order: str) -> str:
            o = order.upper()
            if o == "WAIVE":
                return "waive"
            if " H" in o or " HOLD" in o:
                return "hold"
            if " S " in o:
                return "support"
            if " C " in o:
                return "convoy"
            if " R " in o:
                return "retreat"
            if " - " in o:
                return "move"
            if " BUILD" in o or o.endswith(" B") or " B " in o:
                return "build"
            if " DISBAND" in o or o.endswith(" D") or " D " in o:
                return "disband"
            return "other"
        # engine fallback
        engine_phases = {ph.name: ph for ph in getattr(game, "get_phase_history", lambda: [])()}
        eng2code = {
            "AUSTRIA": "AUT", "ENGLAND": "ENG", "FRANCE": "FRA",
            "GERMANY": "GER", "ITALY": "ITA", "RUSSIA": "RUS", "TURKEY": "TUR",
        }
        norm = game.map.norm
        orders_by_power = defaultdict(lambda: defaultdict(list))
        # iterate powers present in this phase
        for pwr in sorted(set(target_phase.orders_by_power) | set(target_phase.submitted_orders_by_power)):
            submitted = target_phase.submitted_orders_by_power.get(pwr, [])
            accepted  = target_phase.orders_by_power.get(pwr, [])
            if isinstance(submitted, str):
                submitted = [submitted]
            if isinstance(accepted, str):
                accepted = [accepted]
            def _norm_keep(o):
                return o if o.upper() == "WAIVE" else norm(o)
            sub_norm = {_norm_keep(o): o for o in submitted}
            acc_norm = {_norm_keep(o): o for o in accepted}
            # outcome source
            raw_res = target_phase.results_by_power.get(pwr) or target_phase.results_by_power or {}
            if not raw_res:
                eng = engine_phases.get(target_phase.name)
                if eng and hasattr(eng, "order_results"):
                    key = next((k for k, v in eng2code.items() if v == pwr), None)
                    raw_res = (eng.order_results or {}).get(key, {})
            seen_ok = set()
            # accepted orders
            for idx, order in enumerate(accepted):
                if isinstance(raw_res, dict):
                    res_raw = raw_res.get(order) or raw_res.get(" ".join(order.split()[:2]))
                elif isinstance(raw_res, list) and idx < len(raw_res):
                    res_raw = raw_res[idx]
                else:
                    res_raw = ""
                tag = _scalar(res_raw)
                if not tag or tag == "ok":
                    tag = "success"
                elif "bounce" in tag:
                    tag = "bounce"
                elif "void" == tag:
                    tag = "void: no effect"
                result_field = tag
                orders_by_power[pwr][_order_type(order)].append(
                    {"order": order, "result": result_field}
                )
                seen_ok.add(_norm_keep(order))
            # invalid submissions
            for k in sorted(set(sub_norm) - seen_ok):
                order_str = sub_norm[k]
                orders_by_power[pwr][_order_type(order_str)].append(
                    {"order": order_str, "result": "invalid"}
                )
        # convert nested defaultdicts to regular dicts
        return {
            pwr: {otype: lst for otype, lst in type_map.items()}
            for pwr, type_map in orders_by_power.items()
        }
    def get_messages_this_round(self, power_name: str, current_phase_name: str) -> str:
        current_phase: Optional[Phase] = None
        for phase_obj in self.phases:
--- a/ai_diplomacy/game_logic.py
+++ b/ai_diplomacy/game_logic.py
@ -133,7 +133,7 @@ def save_game_state(
        if year_val is not None and year_val > run_config.max_year:
            break
-        phase_name = phase_block["name"]
+        phase_name = phase_block["name"]        
        # 3a.  Re-attach anything we cached from a previous save.
        if phase_name in previous_phase_extras:
@ -151,12 +151,15 @@ def save_game_state(
            # -------------------------------------------------------------------
            phase_block["config"] = cfg
            phase_block["state_agents"] = current_state_agents
            phase_block["order_results"] = game_history.get_orders_history_for_phase(
                game, completed_phase_name
            )
    # -------------------------------------------------------------- #
    # 4.  Attach top-level metadata and write atomically.            #
    # -------------------------------------------------------------- #
    saved_game["phase_summaries"] = getattr(game, "phase_summaries", {})
-    saved_game["final_agent_states"] = {p_name: {"relationships": a.relationships, "goals": a.goals} for p_name, a in agents.items()}
+    saved_game["final_agent_states"] = {p_name: {"relationships": a.relationships, "goals": a.goals} for p_name, a in agents.items()}    
    # Filter out phases > max_year
    # saved_game["phases"] = [
@ -210,8 +213,8 @@ def load_game_state(
    last_phase = saved_game_data["phases"][-1]
    # Wipe the data that must be regenerated **but preserve the keys**
-    last_phase["orders"] = {}  # was dict
+    last_phase["orders"] = {}
-    last_phase["results"] = {}  # was dict
+    last_phase["results"] = {}
    last_phase["messages"] = []
    game = from_saved_game_format(saved_game_data)
--- a/ai_diplomacy/initialization.py
+++ b/ai_diplomacy/initialization.py
@ -158,7 +158,7 @@ async def initialize_agent_state_ext(
        # Fallback if LLM data was not applied or parsing failed
        if not initial_goals_applied:
            if not agent.goals:  # Only set defaults if no goals were set during agent construction or by LLM
-                agent.goals = ["Survive and expand", "Form beneficial alliances", "Secure key territories"]
+                agent.goals = []
                agent.add_journal_entry(f"[{current_phase}] Set default initial goals as LLM provided none or parse failed.")
                logger.info(f"[{power_name}] Default goals set.")
@ -180,7 +180,7 @@ async def initialize_agent_state_ext(
        success_status = f"Failure: Exception ({type(e).__name__})"
        # Fallback logic for goals/relationships if not already set by earlier fallbacks
        if not agent.goals:
-            agent.goals = ["Survive and expand", "Form beneficial alliances", "Secure key territories"]
+            agent.goals = []
            logger.info(f"[{power_name}] Set fallback goals after top-level error: {agent.goals}")
        if not agent.relationships or all(r == "Neutral" for r in agent.relationships.values()):
            agent.relationships = {p: "Neutral" for p in ALL_POWERS if p != power_name}
--- a/ai_diplomacy/negotiations.py
+++ b/ai_diplomacy/negotiations.py
@ -31,6 +31,9 @@ async def conduct_negotiations(
    Conducts a round-robin conversation among all non-eliminated powers.
    Each power can send up to 'max_rounds' messages, choosing between private
    and global messages each turn. Uses asyncio for concurrent message generation.
    NEW: Prevents a power from sending a private message to the same recipient
    in two consecutive rounds if that recipient has not replied yet.
    """
    logger.info("Starting negotiation phase.")
@ -43,6 +46,11 @@ async def conduct_negotiations(
    else:
        logger.info("No eliminated powers yet.")
    # ── new tracking for consecutive private messages ───────────────
    last_sent_round: Dict[tuple[str, str], int] = {}
    awaiting_reply: Dict[tuple[str, str], bool] = {}
    # ────────────────────────────────────────────────────────────────
    # We do up to 'max_rounds' single-message turns for each power
    for round_index in range(max_rounds):
        logger.info(f"Negotiation Round {round_index + 1}/{max_rounds}")
@ -99,14 +107,13 @@ async def conduct_negotiations(
            if isinstance(result, Exception):
                logger.error(f"Error getting conversation reply for {power_name}: {result}", exc_info=result)
                # Use model_name for stats key if possible
                if model_name in model_error_stats:
                    model_error_stats[model_name]["conversation_errors"] += 1
-                else:  # Fallback to power_name if model name not tracked (shouldn't happen)
+                else:
                    model_error_stats.setdefault(power_name, {}).setdefault("conversation_errors", 0)
                    model_error_stats[power_name]["conversation_errors"] += 1
-                messages = []  # Treat as no messages on error
+                messages = []
-            elif result is None:  # Handle case where client might return None on internal error
+            elif result is None:
                logger.warning(f"Received None instead of messages for {power_name}.")
                messages = []
                if model_name in model_error_stats:
@ -115,48 +122,65 @@ async def conduct_negotiations(
                    model_error_stats.setdefault(power_name, {}).setdefault("conversation_errors", 0)
                    model_error_stats[power_name]["conversation_errors"] += 1
            else:
-                messages = result  # result is the list of message dicts
+                messages = result
                logger.debug(f"Received {len(messages)} message(s) from {power_name}.")
-            # Process the received messages (same logic as before)
+            if not messages:
            if messages:
                for message in messages:
                    # Validate message structure
                    if not isinstance(message, dict) or "content" not in message:
                        logger.warning(f"Invalid message format received from {power_name}: {message}. Skipping.")
                        continue
                    # Create an official message in the Diplomacy engine
                    # Determine recipient based on message type
                    if message.get("message_type") == "private":
                        recipient = normalize_recipient_name(message.get("recipient", GLOBAL))  # Default to GLOBAL if recipient missing somehow
                        if recipient not in game.powers and recipient != GLOBAL:
                            logger.warning(f"Invalid recipient '{recipient}' in message from {power_name}. Sending globally.")
                            recipient = GLOBAL  # Fallback to GLOBAL if recipient power is invalid
                    else:  # Assume global if not private or type is missing
                        recipient = GLOBAL
                    diplo_message = Message(
                        phase=game.current_short_phase,
                        sender=power_name,
                        recipient=recipient,  # Use determined recipient
                        message=message.get("content", ""),  # Use .get for safety
                        time_sent=None,  # Let the engine assign time
                    )
                    game.add_message(diplo_message)
                    # Also add to our custom history
                    game_history.add_message(
                        game.current_short_phase,
                        power_name,
                        recipient,  # Use determined recipient here too
                        message.get("content", ""),  # Use .get for safety
                    )
                    journal_recipient = f"to {recipient}" if recipient != GLOBAL else "globally"
                    agent.add_journal_entry(f"Sent message {journal_recipient} in {game.current_short_phase}: {message.get('content', '')[:100]}...")
                    logger.info(f"[{power_name} -> {recipient}] {message.get('content', '')[:100]}...")
            else:
                logger.debug(f"No valid messages returned or error occurred for {power_name}.")
-                # Error stats handled above based on result type
+                continue
            for message in messages:
                if not isinstance(message, dict) or "content" not in message:
                    logger.warning(f"Invalid message format received from {power_name}: {message}. Skipping.")
                    continue
                # Determine recipient
                if message.get("message_type") == "private":
                    recipient = normalize_recipient_name(message.get("recipient", GLOBAL))
                    if recipient not in game.powers and recipient != GLOBAL:
                        logger.warning(f"Invalid recipient '{recipient}' in message from {power_name}. Sending globally.")
                        recipient = GLOBAL
                else:
                    recipient = GLOBAL
                # ── repetition guard for private messages ─────────────
                if recipient != GLOBAL:
                    pair = (power_name, recipient)
                    if awaiting_reply.get(pair, False) and last_sent_round.get(pair) == round_index - 1:
                        logger.info(
                            f"Discarding repeat private message from {power_name} to {recipient} "
                            f"(waiting for reply since last round)."
                        )
                        continue  # skip this message
                    # record outbound and set waiting flag
                    last_sent_round[pair] = round_index
                    awaiting_reply[pair] = True
                    # recipient has now been contacted; when they respond, we'll clear the flag for the reverse pair
                    awaiting_reply[(recipient, power_name)] = False
                # ─────────────────────────────────────────────────────
                diplo_message = Message(
                    phase=game.current_short_phase,
                    sender=power_name,
                    recipient=recipient,
                    message=message.get("content", ""),
                    time_sent=None,
                )
                game.add_message(diplo_message)
                game_history.add_message(
                    game.current_short_phase,
                    power_name,
                    recipient,
                    message.get("content", ""),
                )
                journal_recipient = f"to {recipient}" if recipient != GLOBAL else "globally"
                agent.add_journal_entry(
                    f"Sent message {journal_recipient} in {game.current_short_phase}: "
                    f"{message.get('content', '')[:100]}..."
                )
                logger.info(f"[{power_name} -> {recipient}] {message.get('content', '')[:100]}...")
    logger.info("Negotiation phase complete.")
    return game_history
--- a/ai_diplomacy/prompt_constructor.py
+++ b/ai_diplomacy/prompt_constructor.py
@ -214,14 +214,10 @@ def construct_order_generation_prompt(
        include_messages=not _use_simple,  # include only when *not* simple
    )
-    # Append goals at the end for focus
+    # delete unused section from context:
-    goals_section = ""
+    context = context.replace('Messages This Round\n\n\nEnd Messages', '')
    if agent_goals:
        goals_section = (
            "\n\nYOUR STRATEGIC GOALS:\n" + "\n".join(f"- {g}" for g in agent_goals) + "\n\nKeep these goals in mind when choosing your orders."
        )
-    final_prompt = system_prompt + "\n\n" + context + "\n\n" + instructions + goals_section
+    final_prompt = system_prompt + "\n\n" + context + "\n\n" + instructions
    # Make the power names more LLM friendly
    final_prompt = (
--- a/ai_diplomacy/prompts/phase_result_diary_prompt.txt
+++ b/ai_diplomacy/prompts/phase_result_diary_prompt.txt
@ -17,9 +17,6 @@ YOUR RELATIONSHIPS BEFORE THIS PHASE
 YOUR GOALS
 {agent_goals}
 YOUR ACTUAL ORDERS
 {your_actual_orders}
 TASK
 Analyze what actually happened this phase compared to negotiations and expectations.
--- a/ai_diplomacy/prompts_simple/context_prompt.txt
+++ b/ai_diplomacy/prompts_simple/context_prompt.txt
@ -8,11 +8,11 @@ Phase: {current_phase}
 Note: You can only build units in your home centers if they are empty. If you lose control of a home center, you cannot build units there, so holding them is critical.
 # Player Status
-Current Goals: {agent_goals}
+Current Goals:
-Relationships: {agent_relationships}
+{agent_goals}
-# Recent Private Diary Entries (Your inner thoughts and plans):
+# Relationships:
-{agent_private_diary}
+{agent_relationships}
 # Order History
 {order_history}
@ -28,6 +28,9 @@ Possible Orders For {current_phase}
 {possible_orders}
 End Possible Orders
 # Recent Private Diary Entries (Your inner thoughts and plans):
 {agent_private_diary}
 Messages This Round
 {messages_this_round}
 End Messages
--- a/ai_diplomacy/prompts_simple/diary_consolidation_prompt.txt
+++ b/ai_diplomacy/prompts_simple/diary_consolidation_prompt.txt
@ -4,24 +4,15 @@ Your Power: {power_name}
 GAME CONTEXT
 You are playing Diplomacy, a strategic board game set in pre-WWI Europe. Seven powers compete for control by conquering supply centers. Victory requires 18 supply centers.
 Key game mechanics:
 - Spring (S) and Fall (F) movement phases where armies/fleets move
 - Fall phases include builds/disbands based on supply center control
 - Units can support, convoy, or attack
 - All orders resolve simultaneously
 - Success often requires negotiated coordination with other powers
 FULL DIARY HISTORY
 {full_diary_text}
 TASK
-Create a comprehensive consolidated summary of the most important parts of this diary history. It will serve as your long-term memory.
+Create a concise consolidated summary of the most important parts of this diary history. It will serve as your long-term memory. Do not include anything that is not strategically or diplomatically useful going forward. Aim for 300 words.
 Prioritize the following:
-1.  **Recent Events, Goals & Intentions**
+1.  **Key Historical Diplomatic Events:** Prioritise both *strategically impactful* and *recent* events.
-2.  **Long-Term Strategy:** Enduring goals, rivalries, and alliances that are still relevant.
+2.  **Information that has ongoing importance & usefulness**
 3.  **Key Historical Events:** Major betrayals, decisive battles, and significant turning points that shape the current diplomatic landscape.
 4.  **Important Notes:** Any notes you deem important from the history not already included.
 RESPONSE FORMAT
 Return ONLY the consolidated summary text. Do not include JSON, formatting markers, or meta-commentary.
--- a/ai_diplomacy/prompts_simple/negotiation_diary_prompt.txt
+++ b/ai_diplomacy/prompts_simple/negotiation_diary_prompt.txt
@ -2,35 +2,45 @@ NEGOTIATION SUMMARY REQUEST
 Power: {power_name}
 Phase: {current_phase}
-MESSAGES THIS ROUND
+Goals (may need updating):
 {messages_this_round}
 CURRENT STATUS
 Goals:
 {agent_goals}
-Relationships:
+Relationships (may need updating):
 {agent_relationships}
 Game State:
 {board_state_str}
 Private Diary:
 {private_diary_summary}
 Messages This Round:
 {messages_this_round}
 TASK
 Analyze the negotiations, goals, relationships, and game state to:
-1. Summarize key outcomes and agreements
+1. Summarize key outcomes and agreements concisely
-2. State your specific intents for {current_phase}, including moves you have agreed to in negotiations and whether you intend to fulfil them.
+2. Concisely state your specific intents for {current_phase}, including moves you have agreed to in negotiations and whether you intend to fulfil them.
 3. Update relationships as needed (Enemy, Unfriendly, Neutral, Friendly, Ally)
-4. Important: You will not see the full negotiation log in the order decision phase, so you must transmit key information about the negotiations to your future self via this summary.
+4. Include your latest overarching goals (including any updates)
 5. Important: You will not see the full negotiation log in the order decision phase, so you must transmit key information about the negotiations to your future self via this summary.
 RESPONSE FORMAT
 Return ONLY a JSON object with this structure:
-{
+{{
-"negotiation_summary": "Key outcomes from negotiations",
+  "negotiation_summary": "Key outcomes from negotiations",
-"intent": "Specific intent for upcoming orders",
+  "intent": "Specific intent for upcoming orders this phase",
-"updated_relationships": {
+  "updated_relationships": {{
-"POWER_NAME": "Enemy|Unfriendly|Neutral|Friendly|Ally"
+    "POWER_NAME": "Enemy|Unfriendly|Neutral|Friendly|Ally"
-}
+  }},
-}
+  "goals": [
    "goal 1",
    "goal 2",
    ...
  ]
 }}
 Reminder: If you need to quote something, only use single quotes in the actual messages so as not to interfere with the JSON structure.
--- a/ai_diplomacy/prompts_simple/phase_result_diary_prompt.txt
+++ b/ai_diplomacy/prompts_simple/phase_result_diary_prompt.txt
@ -1,7 +1,13 @@
 PHASE RESULT ANALYSIS
-Power: {power_name}
+Your Power: {power_name}
 Phase: {current_phase}
 RECENT DIARY ENTRIES
 {formatted_diary}
 BOARD STATE
 {board_state}
 PHASE SUMMARY
 {phase_summary}
@ -17,9 +23,6 @@ YOUR RELATIONSHIPS BEFORE THIS PHASE
 YOUR GOALS
 {agent_goals}
 YOUR ACTUAL ORDERS
 {your_actual_orders}
 TASK
 Analyze what actually happened this phase compared to negotiations and expectations.
@ -29,12 +32,12 @@ Consider:
 3. SURPRISES: What unexpected moves occurred?
 4. IMPACT: How did these events affect your strategic position?
-Write a reflective diary entry (150-250 words) that:
+Write a concise diary entry (100-150 words) of the most important things you would like to remember, e.g.:
- Identifies key betrayals or successful collaborations
+- Key betrayals or successful collaborations
- Assesses impact on your position
+- Assess impact on your position
- Updates your understanding of other powers' trustworthiness
+- Update your understanding of other powers' trustworthiness
- Notes strategic lessons learned
+- Strategic lessons learned
- Adjusts your perception of threats and opportunities
+- Moves that failed, and ideas on how to avoid the error in the future
 Focus on concrete events and their implications for your future strategy.
--- a/analysis/statistical_game_analysis.py
+++ b/analysis/statistical_game_analysis.py
@ -69,6 +69,12 @@ class StatisticalGameAnalyzer:
        'order_generation', 'order_diary', 'state_update_parsing_empty_or_invalid_data',
        'diary_consolidation', 'state_update_partial_data', 'state_update_no_response'
    ]
    ORDER_TYPES = [
        "move", "hold", "support", "convoy",
        "build", "disband", "waive", "other",
        "retreat"
    ]
    def __init__(self):
        """Initialize analyzer with configuration constants."""
@ -234,6 +240,103 @@ class StatisticalGameAnalyzer:
        return responses
    def _extract_order_results_features(self, power: str, phase_data: dict) -> dict:
        """
        Count orders and outcomes for a single power in one phase and add
        a success-rate (0-1) for every order type.
        """
        features: dict[str, float | int] = {}
        for ot in self.ORDER_TYPES:
            plural = f"{ot}s" if not ot.endswith("s") else ot
            for metric in ("total", "success", "bounce", "void", "invalid"):
                features[f"orders_{plural}_{metric}"] = 0
            features[f"orders_{plural}_success_rate"] = 0.0      # ← new
        orders_by_type = phase_data.get("order_results", {}).get(power, {})
        if not orders_by_type:
            return features
        for otype, order_list in orders_by_type.items():
            otype = otype.lower()
            if otype not in self.ORDER_TYPES:
                otype = "other"
            plural = f"{otype}s" if not otype.endswith("s") else otype
            for entry in order_list:
                result = str(entry.get("result", "")).lower().strip()
                key_base = f"orders_{plural}"
                features[f"{key_base}_total"] += 1
                match result:
                    case "success":
                        features[f"{key_base}_success"] += 1
                    case "bounce":
                        features[f"{key_base}_bounce"] += 1
                    case "invalid":
                        features[f"{key_base}_invalid"] += 1
                    case _ if result in ("void", "void: no effect", ""):
                        features[f"{key_base}_void"] += 1
        # ── derive success rates ──
        for ot in self.ORDER_TYPES:
            plural = f"{ot}s" if not ot.endswith("s") else ot
            succ = features[f"orders_{plural}_success"]
            tot  = features[f"orders_{plural}_total"]
            features[f"orders_{plural}_success_rate"] = succ / tot if tot else 0.0
        return features
    # ────────────────── GAME-LEVEL ORDER TOTALS ──────────────────
    def _aggregate_order_results(self, power: str, game_data: dict) -> dict:
        """
        Sum every order-type/result pair over *all* phases for one power
        and add success-rate (0-1) columns.
        """
        totals: dict[str, float | int] = {}
        for ot in self.ORDER_TYPES:
            plural = f"{ot}s" if not ot.endswith("s") else ot
            for metric in ("total", "success", "bounce", "void", "invalid"):
                totals[f"orders_{plural}_{metric}"] = 0
            totals[f"orders_{plural}_success_rate"] = 0.0          # ← new
        for phase in game_data.get("phases", []):
            orders_by_type = phase.get("order_results", {}).get(power, {})
            if not orders_by_type:
                continue
            for otype, order_list in orders_by_type.items():
                otype = otype.lower()
                if otype not in self.ORDER_TYPES:
                    otype = "other"
                plural = f"{otype}s" if not otype.endswith("s") else otype
                for entry in order_list:
                    result = str(entry.get("result", "")).lower().strip()
                    key_base = f"orders_{plural}"
                    totals[f"{key_base}_total"] += 1
                    match result:
                        case "success":
                            totals[f"{key_base}_success"] += 1
                        case "bounce":
                            totals[f"{key_base}_bounce"] += 1
                        case "invalid":
                            totals[f"{key_base}_invalid"] += 1
                        case _ if result in ("void", "void: no effect", ""):
                            totals[f"{key_base}_void"] += 1
        # ── derive success rates ──
        for ot in self.ORDER_TYPES:
            plural = f"{ot}s" if not ot.endswith("s") else ot
            succ = totals[f"orders_{plural}_success"]
            tot  = totals[f"orders_{plural}_total"]
            totals[f"orders_{plural}_success_rate"] = succ / tot if tot else 0.0
        return totals
    def _extract_phase_features(self, llm_responses: List[dict], game_data: dict) -> List[dict]:
        """Extract phase-level features for all powers, phases, and response types."""
        phase_features = []
@ -294,6 +397,10 @@ class StatisticalGameAnalyzer:
        # === FAILURE ANALYSIS (HARD MODE) ===
        failure_metrics = self._analyze_failures(power, phase, response_type, llm_responses)
        features.update(failure_metrics)
        # === ORDER-RESULT METRICS ===
        order_result_features = self._extract_order_results_features(power, phase_data)
        features.update(order_result_features)
        # Add response-type specific features
@ -794,7 +901,10 @@ class StatisticalGameAnalyzer:
        if total_calls > 0:
            features['overall_failure_rate_percentage'] = (total_failures / total_calls) * 100.0
            features['overall_success_rate_percentage'] = (total_successes / total_calls) * 100.0
-        
+
        # === ORDER TOTALS (whole game) ===
        order_totals = self._aggregate_order_results(power, game_data)
        features.update(order_totals)
    # Helper methods
@ -1067,6 +1177,15 @@ class StatisticalGameAnalyzer:
            'military_units_gained_vs_prev_phase',
            'relationships'
        ]
        # ensure order columns
        for ot in self.ORDER_TYPES:
            plural = f"{ot}s" if not ot.endswith("s") else ot
            for suffix in ("total", "success", "bounce", "void", "invalid", "success_rate"):
                col = f"orders_{plural}_{suffix}"
                if col not in fieldnames:
                    fieldnames.append(col)
        # Ensure all actual fields are included (in case we missed any)
        actual_fields = set()
@ -1140,6 +1259,17 @@ class StatisticalGameAnalyzer:
            # === Diplobench style single scalar game score ===
            'game_score',
        ]
        # ensure order-total columns
        for ot in self.ORDER_TYPES:
            plural = f"{ot}s" if not ot.endswith("s") else ot
            base = f"orders_{plural}_total"
            for suffix in ("total", "success", "bounce", "void", "invalid", "success_rate"):
                col = f"orders_{plural}_{suffix}"
                if col not in fieldnames:
                    fieldnames.append(col)
        # Ensure all actual fields are included
        actual_fields = set()
--- a/experiment_runner/analysis/statistical_game_analysis.py
+++ b/experiment_runner/analysis/statistical_game_analysis.py
@ -227,6 +227,8 @@ def _plot_relationships_per_game(
        # ── NEW: discard rows with no relationship info ────────────
        game_df = game_df[game_df["rel_dict"].apply(bool)]
        # ── keep only MOVE phases; drop retreat (R) and adjustment (A) ─────
        game_df = game_df[game_df["game_phase"].str.upper().str.endswith("M")]
        if game_df.empty:               # nothing left to plot
            continue
@ -334,9 +336,18 @@ def _plot_relationships_per_game(
                        else to_rgba(base_colour, alpha=0.35)
                    )
                    # ── “double” a lone point so it shows up as a short flat line ──
                    finite_pts = [(x, y) for x, y in zip(data["x"], y_off) if not math.isnan(y)]
                    if len(finite_pts) == 1:
                        x0, y0 = finite_pts[0]
                        xs = [x0 - 0.05, x0 + 0.05]   # tiny horizontal spread
                        ys = [y0, y0]
                    else:
                        xs, ys = data["x"], y_off
                    plt.plot(
-                        data["x"],
+                        xs,
-                        y_off,
+                        ys,
                        label=f"{other} ({kind})",
                        color=colour,
                        linewidth=2,
--- a/experiment_runner/analysis/summary.py
+++ b/experiment_runner/analysis/summary.py
@ -169,6 +169,7 @@ def run(exp_dir: Path, ctx: dict):  # pylint: disable=unused-argument
        sns.set_style("whitegrid")
        plt.figure(figsize=(10, 7))
        sns.boxplot(x="Power", y="SupplyCenters", data=df, palette="pastel")
        plt.ylim(0, 18)
        plt.title("Supply-center distribution")
        plt.savefig(analysis_dir / "results_summary.png", dpi=150)
        plt.close()
--- a/lm_game.py
+++ b/lm_game.py
@ -334,6 +334,17 @@ async def main():
            if neg_diary_tasks:
                await asyncio.gather(*neg_diary_tasks, return_exceptions=True)
        # Diary Consolidation
        if current_short_phase.startswith("S") and current_short_phase.endswith("M"):
            consolidation_tasks = [
                run_diary_consolidation(agent, game, llm_log_file_path,
                                        prompts_dir=agent.prompts_dir)
                for agent in agents.values()
                if not game.powers[agent.power_name].is_eliminated()
            ]
            if consolidation_tasks:
                await asyncio.gather(*consolidation_tasks, return_exceptions=True)
        # --- 4c. Order Generation ---
        logger.info("Getting orders from agents...")
        board_state = game.get_state()
@ -350,7 +361,7 @@ async def main():
                        game, agent.client, board_state, power_name, possible_orders,
                        game_history, model_error_stats,
                        agent_goals=agent.goals, agent_relationships=agent.relationships,
-                        agent_private_diary_str=agent.format_private_diary_for_prompt(),
+                        agent_private_diary_str=agent.get_latest_phase_diary_entries(), # only include latest phase in orders prompt
                        log_file_path=llm_log_file_path, phase=current_phase,
                    )
                )
@ -378,10 +389,11 @@ async def main():
            submitted_orders_this_phase[p_name] = valid + invalid
            # diary entry only for the orders we tried to submit
-            if valid or invalid:
+            if False: # disabled for now
-                await agents[p_name].generate_order_diary_entry(
+                if valid or invalid:
-                    game, valid + invalid, llm_log_file_path
+                    await agents[p_name].generate_order_diary_entry(
-                )
+                        game, valid + invalid, llm_log_file_path
                    )
        # --- 4d. Process Phase ---
        completed_phase = current_phase
@ -414,26 +426,18 @@ async def main():
        all_orders_this_phase = game.order_history.get(current_short_phase, {})
        # Phase Result Diary Entries
-        phase_result_diary_tasks = [
+        if current_short_phase.endswith("M"):
-            agent.generate_phase_result_diary_entry(game, game_history, phase_summary, all_orders_this_phase, llm_log_file_path)
+            phase_result_diary_tasks = [
-            for agent in agents.values() if not game.powers[agent.power_name].is_eliminated()
+                agent.generate_phase_result_diary_entry(game, game_history, phase_summary, all_orders_this_phase, llm_log_file_path, current_short_phase)
-        ]
+                for agent in agents.values() if not game.powers[agent.power_name].is_eliminated()
        if phase_result_diary_tasks:
            await asyncio.gather(*phase_result_diary_tasks, return_exceptions=True)
        # Diary Consolidation
        if current_short_phase.startswith("S") and current_short_phase.endswith("M"):
            consolidation_tasks = [
                run_diary_consolidation(agent, game, llm_log_file_path,
                                        prompts_dir=agent.prompts_dir)
                for agent in agents.values()
                if not game.powers[agent.power_name].is_eliminated()
            ]
-            if consolidation_tasks:
+            if phase_result_diary_tasks:
-                await asyncio.gather(*consolidation_tasks, return_exceptions=True)
+                await asyncio.gather(*phase_result_diary_tasks, return_exceptions=True)
        # Agent State Updates
-        if current_short_phase.endswith("M"):
+        if current_short_phase.endswith("M") and run_config.num_negotiation_rounds == 0: # r'ships are updated in negotiation round. otherwise in no press, updated in a separate step.
            current_board_state = game.get_state()
            state_update_tasks = [
                agent.analyze_phase_and_update_state(game, current_board_state, phase_summary, game_history, llm_log_file_path)