state update fixes & streamline prompts

2026-04-19 12:58:09 +00:00 · 2025-07-12 10:17:17 +10:00 · 2025-07-12 10:17:17 +10:00 · b4a56126ec
commit b4a56126ec
parent 1f154a7073
17 changed files with 710 additions and 281 deletions
--- a/ai_diplomacy/agent.py
+++ b/ai_diplomacy/agent.py
@ -97,6 +97,26 @@ class DiplomacyAgent:
        logger.info(f"Initialized DiplomacyAgent for {self.power_name} with goals: {self.goals}")
        self.add_journal_entry(f"Agent initialized. Initial Goals: {self.goals}")

+    def _format_board_state(self, board_state_dict):
+        units = board_state_dict.get('units', {})
+        centers = board_state_dict.get('centers', {})
+
+        eliminated = {power for power, scs in centers.items() if not scs}
+
+        parts = ["Units:"]
+        for power, unit_list in sorted(units.items()):
+            label = f"{power} (Eliminated)" if power in eliminated else power
+            parts.append(f"  {label}: {', '.join(unit_list)}")
+
+        parts.append("Centers:")
+        for power, center_list in sorted(centers.items()):
+            label = f"{power} (Eliminated)" if power in eliminated else power
+            parts.append(f"  {label}: {', '.join(center_list)}")
+
+        return "\n".join(parts)
+
+
+
    def _extract_json_from_text(self, text: str) -> dict:
        """Extract and parse JSON from text, handling common LLM response formats."""
        if not text or not text.strip():
@ -368,6 +388,46 @@ class DiplomacyAgent:
            f"[{self.power_name}] DIARY ENTRY ADDED for {phase}. Total full entries: {len(self.full_private_diary)}. New entry: {entry[:100]}..."
        )

+    def get_latest_phase_diary_entries(
+        self,
+        *,
+        use_private_diary: bool = False,
+        separator: str = "\n\n",
+    ) -> str:
+        """
+        Return all diary entries for the most-recent phase.
+
+        Args:
+            use_private_diary: If True look at self.private_diary, otherwise
+                               self.full_private_diary (default).
+            separator: String to place between entries in the final output.
+
+        Returns:
+            A single formatted string containing every entry from the
+            latest phase, or an empty string if no diary content exists.
+        """
+        diary: List[str] = self.private_diary if use_private_diary else self.full_private_diary
+        if not diary:
+            return ""
+
+        # Expect entries like "[S1901M] text…"
+        phase_match = re.match(r"\[([^\]]+)\]", diary[-1])
+        if not phase_match:
+            # Last line didn’t start with a phase tag; just return it.
+            return diary[-1]
+
+        latest_phase = phase_match.group(1)
+        recent_entries: List[str] = []
+
+        for entry in reversed(diary):
+            if entry.startswith(f"[{latest_phase}]"):
+                recent_entries.append(entry)
+            else:
+                break
+
+        recent_entries.reverse()  # restore chronological order
+        return separator.join(recent_entries)
+
    def format_private_diary_for_prompt(self) -> str:
        """
        Formats the context diary for inclusion in a prompt.
@ -437,12 +497,12 @@ class DiplomacyAgent:

            # Prepare context for the prompt
            board_state_dict = game.get_state()
-            board_state_str = f"Units: {board_state_dict.get('units', {})}, Centers: {board_state_dict.get('centers', {})}"
+            board_state_str = self._format_board_state(board_state_dict)

            messages_this_round = game_history.get_messages_this_round(power_name=self.power_name, current_phase_name=game.current_short_phase)
            if not messages_this_round.strip() or messages_this_round.startswith("\n(No messages"):
                messages_this_round = (
-                    "(No messages involving your power this round that require deep reflection for diary. Focus on overall situation.)"
+                    "(No messages involving your power this round.)"
                )

            current_relationships_str = json.dumps(self.relationships)
@ -463,31 +523,34 @@ class DiplomacyAgent:

            # Do aggressive preprocessing of the template to fix the problematic patterns
            # This includes removing any newlines or whitespace before JSON keys that cause issues
-            for pattern in ["negotiation_summary", "updated_relationships", "relationship_updates", "intent"]:
-                # Fix the "\n  "key"" pattern that breaks .format()
-                prompt_template_content = re.sub(rf'\n\s*"{pattern}"', f'"{pattern}"', prompt_template_content)
+            if False:
+                for pattern in ["negotiation_summary", "updated_relationships", "relationship_updates", "intent"]:
+                    # Fix the "\n  "key"" pattern that breaks .format()
+                    prompt_template_content = re.sub(rf'\n\s*"{pattern}"', f'"{pattern}"', prompt_template_content)

-            # Escape all curly braces in JSON examples to prevent format() from interpreting them
-            # First, temporarily replace the actual template variables
-            temp_vars = [
-                "power_name",
-                "current_phase",
-                "messages_this_round",
-                "agent_goals",
-                "agent_relationships",
-                "board_state_str",
-                "ignored_messages_context",
-            ]
-            for var in temp_vars:
-                prompt_template_content = prompt_template_content.replace(f"{{{var}}}", f"<<{var}>>")
+                # Escape all curly braces in JSON examples to prevent format() from interpreting them
+                # First, temporarily replace the actual template variables
+            
+                temp_vars = [
+                    "power_name",
+                    "current_phase",
+                    "messages_this_round",
+                    "agent_goals",
+                    "agent_relationships",
+                    "board_state_str",
+                    "ignored_messages_context",
+                    "private_diary_summary",
+                ]
+                for var in temp_vars:
+                    prompt_template_content = prompt_template_content.replace(f"{{{var}}}", f"<<{var}>>")

-            # Now escape all remaining braces (which should be JSON)
-            prompt_template_content = prompt_template_content.replace("{", "{{")
-            prompt_template_content = prompt_template_content.replace("}", "}}")
+                # Now escape all remaining braces (which should be JSON)
+                prompt_template_content = prompt_template_content.replace("{", "{{")
+                prompt_template_content = prompt_template_content.replace("}", "}}")

-            # Restore the template variables
-            for var in temp_vars:
-                prompt_template_content = prompt_template_content.replace(f"<<{var}>>", f"{{{var}}}")
+                # Restore the template variables
+                for var in temp_vars:
+                    prompt_template_content = prompt_template_content.replace(f"<<{var}>>", f"{{{var}}}")

            # Create a dictionary with safe values for formatting
            format_vars = {
@ -515,8 +578,6 @@ class DiplomacyAgent:

            logger.debug(f"[{self.power_name}] Negotiation diary prompt:\n{full_prompt[:500]}...")

-            logger.debug(f"[{self.power_name}] Negotiation diary prompt:\n{full_prompt[:500]}...")
-
            raw_response = await run_llm_and_log(
                client=self.client,
                prompt=full_prompt,
@ -567,7 +628,6 @@ class DiplomacyAgent:
                        diary_text_candidate = parsed_data["intent"]
                    else:
                        diary_text_candidate += "\nIntent: " + parsed_data["intent"]
-
                if diary_text_candidate:
                    diary_entry_text = diary_text_candidate
                else:
@ -610,6 +670,10 @@ class DiplomacyAgent:
                elif new_relationships is not None:  # It was provided but not a dict
                    logger.warning(f"[{self.power_name}] 'updated_relationships' from diary LLM was not a dictionary: {type(new_relationships)}")

+                # update goals
+                if "goals" in parsed_data:
+                    self.update_goals(parsed_data["goals"])
+
            # Add the generated (or fallback) diary entry
            self.add_diary_entry(diary_entry_text, game.current_short_phase)
            if relationships_updated:
@ -627,16 +691,19 @@ class DiplomacyAgent:
            self.add_diary_entry(f"(Error generating diary entry: {type(e).__name__})", game.current_short_phase)
        finally:
            if log_file_path:  # Ensure log_file_path is provided
-                log_llm_response(
-                    log_file_path=log_file_path,
-                    model_name=self.client.model_name if self.client else "UnknownModel",
-                    power_name=self.power_name,
-                    phase=game.current_short_phase if game else "UnknownPhase",
-                    response_type="negotiation_diary",  # Specific type for CSV logging
-                    raw_input_prompt=full_prompt,
-                    raw_response=raw_response,
-                    success=success_status,
-                )
+                try:
+                    log_llm_response(
+                        log_file_path=log_file_path,
+                        model_name=self.client.model_name if self.client else "UnknownModel",
+                        power_name=self.power_name,
+                        phase=game.current_short_phase if game else "UnknownPhase",
+                        response_type="negotiation_diary",  # Specific type for CSV logging
+                        raw_input_prompt=full_prompt,
+                        raw_response=raw_response,
+                        success=success_status,
+                    )
+                except Exception as e:
+                    print(e)

    async def generate_order_diary_entry(self, game: "Game", orders: List[str], log_file_path: str):
        """
@ -783,105 +850,108 @@ class DiplomacyAgent:
        # Rest of the code remains the same

    async def generate_phase_result_diary_entry(
-        self, game: "Game", game_history: "GameHistory", phase_summary: str, all_orders: Dict[str, List[str]], log_file_path: str
+        self, game: "Game", game_history: "GameHistory", phase_summary: str, all_orders: Dict[str, List[str]], log_file_path: str, phase_name: str
    ):
-        """
-        Generates a diary entry analyzing the actual phase results,
-        comparing them to negotiations and identifying betrayals/collaborations.
-        """
-        logger.info(f"[{self.power_name}] Generating phase result diary entry for {game.current_short_phase}...")
-
-        # Load the template
-        prompt_template = load_prompt("phase_result_diary_prompt.txt", prompts_dir=self.prompts_dir)
-        if not prompt_template:
-            logger.error(f"[{self.power_name}] Could not load phase_result_diary_prompt.txt. Skipping diary entry.")
-            return
-
-        # Format all orders for the prompt
-        all_orders_formatted = ""
-        for power, orders in all_orders.items():
-            orders_str = ", ".join(orders) if orders else "No orders"
-            all_orders_formatted += f"{power}: {orders_str}\n"
-
-        # Get your own orders
-        your_orders = all_orders.get(self.power_name, [])
-        your_orders_str = ", ".join(your_orders) if your_orders else "No orders"
-
-        # Get recent negotiations for this phase
-        messages_this_phase = game_history.get_messages_by_phase(game.current_short_phase)
-        your_negotiations = ""
-        for msg in messages_this_phase:
-            if msg.sender == self.power_name:
-                your_negotiations += f"To {msg.recipient}: {msg.content}\n"
-            elif msg.recipient == self.power_name:
-                your_negotiations += f"From {msg.sender}: {msg.content}\n"
-
-        if not your_negotiations:
-            your_negotiations = "No negotiations this phase"
-
-        # Format relationships
-        relationships_str = "\n".join([f"{p}: {r}" for p, r in self.relationships.items()])
-
-        # Format goals
-        goals_str = "\n".join([f"- {g}" for g in self.goals]) if self.goals else "None"
-
-        # Create the prompt
-        prompt = prompt_template.format(
-            power_name=self.power_name,
-            current_phase=game.current_short_phase,
-            phase_summary=phase_summary,
-            all_orders_formatted=all_orders_formatted,
-            your_negotiations=your_negotiations,
-            pre_phase_relationships=relationships_str,
-            agent_goals=goals_str,
-            your_actual_orders=your_orders_str,
-        )
-
-        logger.debug(f"[{self.power_name}] Phase result diary prompt:\n{prompt[:500]}...")
-
-        raw_response = ""
-        success_status = "FALSE"
-
        try:
-            raw_response = await run_llm_and_log(
-                client=self.client,
-                prompt=prompt,
+            """
+            Generates a diary entry analyzing the actual phase results,
+            comparing them to negotiations and identifying betrayals/collaborations.
+            """
+            logger.info(f"[{self.power_name}] Generating phase result diary entry for {game.current_short_phase}...")
+
+            # Load the template
+            prompt_template = load_prompt("phase_result_diary_prompt.txt", prompts_dir=self.prompts_dir)
+            if not prompt_template:
+                logger.error(f"[{self.power_name}] Could not load phase_result_diary_prompt.txt. Skipping diary entry.")
+                return
+
+            # Format all orders for the prompt
+            all_orders_formatted = game_history.get_order_history_for_prompt(
+                game=game,  # Pass the game object for normalization
                power_name=self.power_name,
-                phase=game.current_short_phase,
-                response_type="phase_result_diary",
+                current_phase_name=game.current_short_phase,
+                num_movement_phases_to_show=1,
            )

-            if raw_response and raw_response.strip():
-                # The response should be plain text diary entry
-                diary_entry = raw_response.strip()
-                self.add_diary_entry(diary_entry, game.current_short_phase)
-                success_status = "TRUE"
-                logger.info(f"[{self.power_name}] Phase result diary entry generated and added.")
-            else:
-                fallback_diary = (
-                    f"Phase {game.current_short_phase} completed. Orders executed as: {your_orders_str}. (Failed to generate detailed analysis)"
+            formatted_diary = self.format_private_diary_for_prompt()
+
+            board_state_dict = game.get_state()
+            board_state_str = self._format_board_state(board_state_dict)
+
+            # Get recent negotiations for this phase
+            messages_this_round = game_history.get_messages_this_round(power_name=self.power_name, current_phase_name=game.current_short_phase)
+            if not messages_this_round.strip() or messages_this_round.startswith("\n(No messages"):
+                messages_this_round = (
+                    "(No messages involving your power this round.)"
                )
-                self.add_diary_entry(fallback_diary, game.current_short_phase)
-                logger.warning(f"[{self.power_name}] Empty response from LLM. Added fallback phase result diary.")
-                success_status = "FALSE"

-        except Exception as e:
-            logger.error(f"[{self.power_name}] Error generating phase result diary: {e}", exc_info=True)
-            fallback_diary = f"Phase {game.current_short_phase} completed. Unable to analyze results due to error."
-            self.add_diary_entry(fallback_diary, game.current_short_phase)
-            success_status = f"FALSE: {type(e).__name__}"
-        finally:
-            log_llm_response(
-                log_file_path=log_file_path,
-                model_name=self.client.model_name,
+            # Format relationships
+            relationships_str = "\n".join([f"{p}: {r}" for p, r in self.relationships.items()])
+
+            # Format goals
+            goals_str = "\n".join([f"- {g}" for g in self.goals]) if self.goals else "None"
+
+            # Create the prompt
+            prompt = prompt_template.format(
                power_name=self.power_name,
-                phase=game.current_short_phase,
-                response_type="phase_result_diary",
-                raw_input_prompt=prompt,
-                raw_response=raw_response,
-                success=success_status,
+                current_phase=phase_name,
+                phase_summary=phase_summary,
+                all_orders_formatted=all_orders_formatted,
+                your_negotiations=messages_this_round,
+                pre_phase_relationships=relationships_str,
+                agent_goals=goals_str,
+                formatted_diary=formatted_diary,
+                board_state=board_state_str,
            )

+            logger.debug(f"[{self.power_name}] Phase result diary prompt:\n{prompt[:500]}...")
+
+            raw_response = ""
+            success_status = "FALSE"
+
+            try:
+                raw_response = await run_llm_and_log(
+                    client=self.client,
+                    prompt=prompt,
+                    power_name=self.power_name,
+                    phase=phase_name,
+                    response_type="phase_result_diary",
+                )
+
+                if raw_response and raw_response.strip():
+                    # The response should be plain text diary entry
+                    diary_entry = raw_response.strip()
+                    self.add_diary_entry(diary_entry, phase_name)
+                    success_status = "TRUE"
+                    logger.info(f"[{self.power_name}] Phase result diary entry generated and added.")
+                else:
+                    fallback_diary = (
+                        f"Phase {phase_name} completed."
+                    )
+                    self.add_diary_entry(fallback_diary, phase_name)
+                    logger.warning(f"[{self.power_name}] Empty response from LLM. Added fallback phase result diary.")
+                    success_status = "FALSE"
+
+            except Exception as e:
+                logger.error(f"[{self.power_name}] Error generating phase result diary: {e}", exc_info=True)
+                fallback_diary = f"Phase {phase_name} completed. Unable to analyze results due to error."
+                self.add_diary_entry(fallback_diary, phase_name)
+                success_status = f"FALSE: {type(e).__name__}"
+            finally:
+                log_llm_response(
+                    log_file_path=log_file_path,
+                    model_name=self.client.model_name,
+                    power_name=self.power_name,
+                    phase=phase_name,
+                    response_type="phase_result_diary",
+                    raw_input_prompt=prompt,
+                    raw_response=raw_response,
+                    success=success_status,
+                )
+        except Exception as e:
+            logger.error(e)
+            logger.error('!generate_phase_result_diary_entry failed')
+
    def log_state(self, prefix=""):
        logger.debug(f"[{self.power_name}] {prefix} State: Goals={self.goals}, Relationships={self.relationships}")

--- a/ai_diplomacy/clients.py
+++ b/ai_diplomacy/clients.py
@ -1039,7 +1039,7 @@ class OpenRouterClient(BaseModelClient):

        logger.debug(f"[{self.model_name}] Initialized OpenRouter client")

-    async def generate_response(self, prompt: str, temperature: float = 0.5, inject_random_seed: bool = True) -> str:
+    async def generate_response(self, prompt: str, temperature: float = 0.0, inject_random_seed: bool = True) -> str:
        """Generate a response using OpenRouter with robust error handling."""
        try:
            # Append the call to action to the user's prompt
--- a/ai_diplomacy/diary_logic.py
+++ b/ai_diplomacy/diary_logic.py
@ -11,49 +11,90 @@ if TYPE_CHECKING:

 logger = logging.getLogger(__name__)

-
 async def run_diary_consolidation(
    agent: "DiplomacyAgent",
    game: "Game",
    log_file_path: str,
-    entries_to_keep_unsummarized: int = 6,
+    years_to_keep_unsummarised: int = 1,
    prompts_dir: Optional[str] = None,
 ):
    """
    Consolidate older diary entries while keeping recent ones.
-    This is the logic moved from the DiplomacyAgent class.
+
+    Parameters
+    ----------
+    agent : DiplomacyAgent
+    game  : Game
+    log_file_path : str
+    years_to_keep_unsummarised : int, default 1
+        Number of *distinct years* whose entries remain verbatim.
+    prompts_dir : Optional[str]
    """
-    logger.info(f"[{agent.power_name}] CONSOLIDATION START — {len(agent.full_private_diary)} total full entries")
+    logger.info(
+        f"[{agent.power_name}] CONSOLIDATION START — "
+        f"{len(agent.full_private_diary)} total full entries"
+    )

-    full_entries = [e for e in agent.full_private_diary if not e.startswith("[CONSOLIDATED HISTORY]")]
+    # Remove any earlier consolidated block first
+    full_entries = [
+        e for e in agent.full_private_diary
+        if not e.startswith("[CONSOLIDATED HISTORY]")
+    ]

-    if len(full_entries) <= entries_to_keep_unsummarized:
-        agent.private_diary = list(agent.full_private_diary)
-        logger.info(f"[{agent.power_name}] ≤ {entries_to_keep_unsummarized} full entries — skipping consolidation")
+    if not full_entries:
+        agent.private_diary = []
+        logger.warning(f"[{agent.power_name}] No diary entries found")
        return

-    boundary_entry = full_entries[-entries_to_keep_unsummarized]
-    match = re.search(r"\[[SFWRAB]\s*(\d{4})", boundary_entry)
-    if not match:
-        logger.error(f"[{agent.power_name}] Could not parse year from boundary entry; aborting consolidation")
+    # Extract years by scanning from newest to oldest
+    year_re = re.compile(r"\[[SFWRAB]\s*(\d{4})")  # matches “[S1901”, “[F1902”…”
+    recent_years: list[int] = []
+
+    for entry in reversed(full_entries):            # newest last
+        match = year_re.search(entry)
+        if not match:
+            # Lines without a year tag are considered “dateless”; keep them
+            continue
+        yr = int(match.group(1))
+        if yr not in recent_years:
+            recent_years.append(yr)
+        if len(recent_years) >= years_to_keep_unsummarised:
+            break
+
+    # If every distinct year falls inside the keep-window, skip consolidation
+    all_years = {
+        int(m.group(1))
+        for e in full_entries
+        if (m := year_re.search(e))
+    }
+    if len(all_years - set(recent_years)) == 0:
        agent.private_diary = list(agent.full_private_diary)
+        logger.info(
+            f"[{agent.power_name}] ≤ {years_to_keep_unsummarised} distinct years "
+            "— skipping consolidation"
+        )
        return

-    cutoff_year = int(match.group(1))
-    logger.info(f"[{agent.power_name}] Cut-off year for consolidation: {cutoff_year}")
+    # Partition entries
+    keep_set = set(recent_years)

-    def _entry_year(entry: str) -> int | None:
-        m = re.search(r"\[[SFWRAB]\s*(\d{4})", entry)
+    def _entry_year(entry: str) -> Optional[int]:
+        m = year_re.search(entry)
        return int(m.group(1)) if m else None

-    entries_to_summarize = [e for e in full_entries if (_entry_year(e) is not None and _entry_year(e) < cutoff_year)]
-    entries_to_keep = [e for e in full_entries if (_entry_year(e) is None or _entry_year(e) >= cutoff_year)]
+    entries_to_keep = [e for e in full_entries if (_entry_year(e) in keep_set)]
+    entries_to_summarise = [e for e in full_entries if (_entry_year(e) not in keep_set)]

-    logger.info(f"[{agent.power_name}] Summarising {len(entries_to_summarize)} entries; keeping {len(entries_to_keep)} recent entries verbatim")
+    logger.info(
+        f"[{agent.power_name}] Summarising {len(entries_to_summarise)} entries "
+        f"from years < {min(keep_set)}; keeping {len(entries_to_keep)} recent entries verbatim"
+    )

-    if not entries_to_summarize:
+    if not entries_to_summarise:
        agent.private_diary = list(agent.full_private_diary)
-        logger.warning(f"[{agent.power_name}] No eligible entries to summarise; context diary left unchanged")
+        logger.warning(
+            f"[{agent.power_name}] No eligible entries to summarise; context diary left unchanged"
+        )
        return

    prompt_template = load_prompt("diary_consolidation_prompt.txt", prompts_dir=prompts_dir)
@ -63,7 +104,7 @@ async def run_diary_consolidation(

    prompt = prompt_template.format(
        power_name=agent.power_name,
-        full_diary_text="\n\n".join(entries_to_summarize),
+        full_diary_text="\n\n".join(entries_to_summarise),
    )

    raw_response = ""
@ -71,7 +112,6 @@ async def run_diary_consolidation(
    consolidation_client = None
    try:
        consolidation_client = agent.client
-
        raw_response = await run_llm_and_log(
            client=consolidation_client,
            prompt=prompt,
@ -87,14 +127,21 @@ async def run_diary_consolidation(
        new_summary_entry = f"[CONSOLIDATED HISTORY] {consolidated_text}"
        agent.private_diary = [new_summary_entry] + entries_to_keep
        success_flag = "TRUE"
-        logger.info(f"[{agent.power_name}] Consolidation complete — {len(agent.private_diary)} context entries now")
+        logger.info(
+            f"[{agent.power_name}] Consolidation complete — "
+            f"{len(agent.private_diary)} context entries now"
+        )

    except Exception as exc:
        logger.error(f"[{agent.power_name}] Diary consolidation failed: {exc}", exc_info=True)
    finally:
        log_llm_response(
            log_file_path=log_file_path,
-            model_name=(consolidation_client.model_name if consolidation_client is not None else agent.client.model_name),
+            model_name=(
+                consolidation_client.model_name
+                if consolidation_client is not None
+                else agent.client.model_name
+            ),
            power_name=agent.power_name,
            phase=game.current_short_phase,
            response_type="diary_consolidation",
--- a/ai_diplomacy/game_history.py
+++ b/ai_diplomacy/game_history.py
@ -182,7 +182,7 @@ class GameHistory:
        eng2code = {"AUSTRIA": "AUT", "ENGLAND": "ENG", "FRANCE": "FRA", "GERMANY": "GER", "ITALY": "ITA", "RUSSIA": "RUS", "TURKEY": "TUR"}
        norm = game.map.norm

-        out_lines = ["**ORDER HISTORY (Recent Rounds)**"]
+        out_lines = []

        for ph in phases_to_report:
            if not (ph.orders_by_power or ph.submitted_orders_by_power):
@ -234,8 +234,14 @@ class GameHistory:
                        tag = "bounce"
                    elif "void" == tag:
                        tag = "void: no effect"
-
-                    out_lines.append(f"    {order} ({tag})")
+                    
+                    # don't show (success) tag for hold moves, it might be causing convergence on
+                    # always-hold behaviour
+                    is_hold = any(kw in order.upper() for kw in (" H", " HOLD"))
+                    if tag == "success" and is_hold:
+                        out_lines.append(f"    {order}")
+                    else:
+                        out_lines.append(f"    {order} ({tag})")
                    seen_ok.add(_norm_keep(order))

                # 2️⃣ invalid submissions
@ -246,6 +252,139 @@ class GameHistory:
            return "\n(No orders were issued in recent history)\n"
        return "\n".join(out_lines)

+    def get_orders_history_for_phase(
+            self,
+            game: "Game",
+            phase_name: str,                      # ← the single phase we want
+        ) -> Dict[str, Dict[str, List[Dict[str, str]]]]:
+        """
+        Return the orders for `phase_name` as:
+
+            {
+                "<POWER>": {
+                    "<order_type>": [
+                        {"order": "<order str>", "result": "<result str>"},
+                        ...
+                    ],
+                    ...
+                },
+                ...
+            }
+
+        Order types: move, hold, support, convoy, build, disband, waive, other.
+        """
+
+        # ── locate the requested phase ──────────────────────────────
+        target_phase = next((p for p in self.phases if p.name == phase_name), None)
+        if not target_phase or not (target_phase.orders_by_power or target_phase.submitted_orders_by_power):
+            return {}
+
+        # ── helpers ───────────────────────────────────────────────
+        def _scalar(res):
+            """Flatten lists/dicts to a single outcome token."""
+            tag = res
+            while isinstance(tag, list):
+                tag = tag[0] if tag else ""
+            if isinstance(tag, dict):
+                tag = tag.get("outcome") or tag.get("result") or ""
+            return str(tag).strip().lower()
+
+        def _order_type(order: str) -> str:
+            o = order.upper()
+            if o == "WAIVE":
+                return "waive"
+            if " H" in o or " HOLD" in o:
+                return "hold"
+            if " S " in o:
+                return "support"
+            if " C " in o:
+                return "convoy"
+            if " R " in o:
+                return "retreat"
+            if " - " in o:
+                return "move"
+            if " BUILD" in o or o.endswith(" B") or " B " in o:
+                return "build"
+            if " DISBAND" in o or o.endswith(" D") or " D " in o:
+                return "disband"
+            return "other"
+
+        # engine fallback
+        engine_phases = {ph.name: ph for ph in getattr(game, "get_phase_history", lambda: [])()}
+        eng2code = {
+            "AUSTRIA": "AUT", "ENGLAND": "ENG", "FRANCE": "FRA",
+            "GERMANY": "GER", "ITALY": "ITA", "RUSSIA": "RUS", "TURKEY": "TUR",
+        }
+        norm = game.map.norm
+
+        orders_by_power = defaultdict(lambda: defaultdict(list))
+
+        # iterate powers present in this phase
+        for pwr in sorted(set(target_phase.orders_by_power) | set(target_phase.submitted_orders_by_power)):
+            submitted = target_phase.submitted_orders_by_power.get(pwr, [])
+            accepted  = target_phase.orders_by_power.get(pwr, [])
+
+            if isinstance(submitted, str):
+                submitted = [submitted]
+            if isinstance(accepted, str):
+                accepted = [accepted]
+
+            def _norm_keep(o):
+                return o if o.upper() == "WAIVE" else norm(o)
+
+            sub_norm = {_norm_keep(o): o for o in submitted}
+            acc_norm = {_norm_keep(o): o for o in accepted}
+
+            # outcome source
+            raw_res = target_phase.results_by_power.get(pwr) or target_phase.results_by_power or {}
+            if not raw_res:
+                eng = engine_phases.get(target_phase.name)
+                if eng and hasattr(eng, "order_results"):
+                    key = next((k for k, v in eng2code.items() if v == pwr), None)
+                    raw_res = (eng.order_results or {}).get(key, {})
+
+            seen_ok = set()
+
+            # accepted orders
+            for idx, order in enumerate(accepted):
+                if isinstance(raw_res, dict):
+                    res_raw = raw_res.get(order) or raw_res.get(" ".join(order.split()[:2]))
+                elif isinstance(raw_res, list) and idx < len(raw_res):
+                    res_raw = raw_res[idx]
+                else:
+                    res_raw = ""
+
+                tag = _scalar(res_raw)
+                if not tag or tag == "ok":
+                    tag = "success"
+                elif "bounce" in tag:
+                    tag = "bounce"
+                elif "void" == tag:
+                    tag = "void: no effect"
+
+                result_field = tag
+
+                orders_by_power[pwr][_order_type(order)].append(
+                    {"order": order, "result": result_field}
+                )
+                seen_ok.add(_norm_keep(order))
+
+            # invalid submissions
+            for k in sorted(set(sub_norm) - seen_ok):
+                order_str = sub_norm[k]
+                orders_by_power[pwr][_order_type(order_str)].append(
+                    {"order": order_str, "result": "invalid"}
+                )
+
+        # convert nested defaultdicts to regular dicts
+        return {
+            pwr: {otype: lst for otype, lst in type_map.items()}
+            for pwr, type_map in orders_by_power.items()
+        }
+
+
+
+
    def get_messages_this_round(self, power_name: str, current_phase_name: str) -> str:
        current_phase: Optional[Phase] = None
        for phase_obj in self.phases:
--- a/ai_diplomacy/game_logic.py
+++ b/ai_diplomacy/game_logic.py
@ -133,7 +133,7 @@ def save_game_state(
        if year_val is not None and year_val > run_config.max_year:
            break

-        phase_name = phase_block["name"]
+        phase_name = phase_block["name"]        

        # 3a.  Re-attach anything we cached from a previous save.
        if phase_name in previous_phase_extras:
@ -151,12 +151,15 @@ def save_game_state(
            # -------------------------------------------------------------------
            phase_block["config"] = cfg
            phase_block["state_agents"] = current_state_agents
+            phase_block["order_results"] = game_history.get_orders_history_for_phase(
+                game, completed_phase_name
+            )

    # -------------------------------------------------------------- #
    # 4.  Attach top-level metadata and write atomically.            #
    # -------------------------------------------------------------- #
    saved_game["phase_summaries"] = getattr(game, "phase_summaries", {})
-    saved_game["final_agent_states"] = {p_name: {"relationships": a.relationships, "goals": a.goals} for p_name, a in agents.items()}
+    saved_game["final_agent_states"] = {p_name: {"relationships": a.relationships, "goals": a.goals} for p_name, a in agents.items()}    

    # Filter out phases > max_year
    # saved_game["phases"] = [
@ -210,8 +213,8 @@ def load_game_state(
    last_phase = saved_game_data["phases"][-1]

    # Wipe the data that must be regenerated **but preserve the keys**
-    last_phase["orders"] = {}  # was dict
-    last_phase["results"] = {}  # was dict
+    last_phase["orders"] = {}
+    last_phase["results"] = {}
    last_phase["messages"] = []

    game = from_saved_game_format(saved_game_data)
--- a/ai_diplomacy/initialization.py
+++ b/ai_diplomacy/initialization.py
@ -158,7 +158,7 @@ async def initialize_agent_state_ext(
        # Fallback if LLM data was not applied or parsing failed
        if not initial_goals_applied:
            if not agent.goals:  # Only set defaults if no goals were set during agent construction or by LLM
-                agent.goals = ["Survive and expand", "Form beneficial alliances", "Secure key territories"]
+                agent.goals = []
                agent.add_journal_entry(f"[{current_phase}] Set default initial goals as LLM provided none or parse failed.")
                logger.info(f"[{power_name}] Default goals set.")

@ -180,7 +180,7 @@ async def initialize_agent_state_ext(
        success_status = f"Failure: Exception ({type(e).__name__})"
        # Fallback logic for goals/relationships if not already set by earlier fallbacks
        if not agent.goals:
-            agent.goals = ["Survive and expand", "Form beneficial alliances", "Secure key territories"]
+            agent.goals = []
            logger.info(f"[{power_name}] Set fallback goals after top-level error: {agent.goals}")
        if not agent.relationships or all(r == "Neutral" for r in agent.relationships.values()):
            agent.relationships = {p: "Neutral" for p in ALL_POWERS if p != power_name}
--- a/ai_diplomacy/negotiations.py
+++ b/ai_diplomacy/negotiations.py
@ -31,6 +31,9 @@ async def conduct_negotiations(
    Conducts a round-robin conversation among all non-eliminated powers.
    Each power can send up to 'max_rounds' messages, choosing between private
    and global messages each turn. Uses asyncio for concurrent message generation.
+
+    NEW: Prevents a power from sending a private message to the same recipient
+    in two consecutive rounds if that recipient has not replied yet.
    """
    logger.info("Starting negotiation phase.")

@ -43,6 +46,11 @@ async def conduct_negotiations(
    else:
        logger.info("No eliminated powers yet.")

+    # ── new tracking for consecutive private messages ───────────────
+    last_sent_round: Dict[tuple[str, str], int] = {}
+    awaiting_reply: Dict[tuple[str, str], bool] = {}
+    # ────────────────────────────────────────────────────────────────
+
    # We do up to 'max_rounds' single-message turns for each power
    for round_index in range(max_rounds):
        logger.info(f"Negotiation Round {round_index + 1}/{max_rounds}")
@ -99,14 +107,13 @@ async def conduct_negotiations(

            if isinstance(result, Exception):
                logger.error(f"Error getting conversation reply for {power_name}: {result}", exc_info=result)
-                # Use model_name for stats key if possible
                if model_name in model_error_stats:
                    model_error_stats[model_name]["conversation_errors"] += 1
-                else:  # Fallback to power_name if model name not tracked (shouldn't happen)
+                else:
                    model_error_stats.setdefault(power_name, {}).setdefault("conversation_errors", 0)
                    model_error_stats[power_name]["conversation_errors"] += 1
-                messages = []  # Treat as no messages on error
-            elif result is None:  # Handle case where client might return None on internal error
+                messages = []
+            elif result is None:
                logger.warning(f"Received None instead of messages for {power_name}.")
                messages = []
                if model_name in model_error_stats:
@ -115,48 +122,65 @@ async def conduct_negotiations(
                    model_error_stats.setdefault(power_name, {}).setdefault("conversation_errors", 0)
                    model_error_stats[power_name]["conversation_errors"] += 1
            else:
-                messages = result  # result is the list of message dicts
+                messages = result
                logger.debug(f"Received {len(messages)} message(s) from {power_name}.")

-            # Process the received messages (same logic as before)
-            if messages:
-                for message in messages:
-                    # Validate message structure
-                    if not isinstance(message, dict) or "content" not in message:
-                        logger.warning(f"Invalid message format received from {power_name}: {message}. Skipping.")
-                        continue
-
-                    # Create an official message in the Diplomacy engine
-                    # Determine recipient based on message type
-                    if message.get("message_type") == "private":
-                        recipient = normalize_recipient_name(message.get("recipient", GLOBAL))  # Default to GLOBAL if recipient missing somehow
-                        if recipient not in game.powers and recipient != GLOBAL:
-                            logger.warning(f"Invalid recipient '{recipient}' in message from {power_name}. Sending globally.")
-                            recipient = GLOBAL  # Fallback to GLOBAL if recipient power is invalid
-                    else:  # Assume global if not private or type is missing
-                        recipient = GLOBAL
-
-                    diplo_message = Message(
-                        phase=game.current_short_phase,
-                        sender=power_name,
-                        recipient=recipient,  # Use determined recipient
-                        message=message.get("content", ""),  # Use .get for safety
-                        time_sent=None,  # Let the engine assign time
-                    )
-                    game.add_message(diplo_message)
-                    # Also add to our custom history
-                    game_history.add_message(
-                        game.current_short_phase,
-                        power_name,
-                        recipient,  # Use determined recipient here too
-                        message.get("content", ""),  # Use .get for safety
-                    )
-                    journal_recipient = f"to {recipient}" if recipient != GLOBAL else "globally"
-                    agent.add_journal_entry(f"Sent message {journal_recipient} in {game.current_short_phase}: {message.get('content', '')[:100]}...")
-                    logger.info(f"[{power_name} -> {recipient}] {message.get('content', '')[:100]}...")
-            else:
+            if not messages:
                logger.debug(f"No valid messages returned or error occurred for {power_name}.")
-                # Error stats handled above based on result type
+                continue
+
+            for message in messages:
+                if not isinstance(message, dict) or "content" not in message:
+                    logger.warning(f"Invalid message format received from {power_name}: {message}. Skipping.")
+                    continue
+
+                # Determine recipient
+                if message.get("message_type") == "private":
+                    recipient = normalize_recipient_name(message.get("recipient", GLOBAL))
+                    if recipient not in game.powers and recipient != GLOBAL:
+                        logger.warning(f"Invalid recipient '{recipient}' in message from {power_name}. Sending globally.")
+                        recipient = GLOBAL
+                else:
+                    recipient = GLOBAL
+
+                # ── repetition guard for private messages ─────────────
+                if recipient != GLOBAL:
+                    pair = (power_name, recipient)
+                    if awaiting_reply.get(pair, False) and last_sent_round.get(pair) == round_index - 1:
+                        logger.info(
+                            f"Discarding repeat private message from {power_name} to {recipient} "
+                            f"(waiting for reply since last round)."
+                        )
+                        continue  # skip this message
+
+                    # record outbound and set waiting flag
+                    last_sent_round[pair] = round_index
+                    awaiting_reply[pair] = True
+                    # recipient has now been contacted; when they respond, we'll clear the flag for the reverse pair
+                    awaiting_reply[(recipient, power_name)] = False
+                # ─────────────────────────────────────────────────────
+
+                diplo_message = Message(
+                    phase=game.current_short_phase,
+                    sender=power_name,
+                    recipient=recipient,
+                    message=message.get("content", ""),
+                    time_sent=None,
+                )
+                game.add_message(diplo_message)
+                game_history.add_message(
+                    game.current_short_phase,
+                    power_name,
+                    recipient,
+                    message.get("content", ""),
+                )
+                journal_recipient = f"to {recipient}" if recipient != GLOBAL else "globally"
+                agent.add_journal_entry(
+                    f"Sent message {journal_recipient} in {game.current_short_phase}: "
+                    f"{message.get('content', '')[:100]}..."
+                )
+                logger.info(f"[{power_name} -> {recipient}] {message.get('content', '')[:100]}...")

    logger.info("Negotiation phase complete.")
    return game_history
+
--- a/ai_diplomacy/prompt_constructor.py
+++ b/ai_diplomacy/prompt_constructor.py
@ -214,14 +214,10 @@ def construct_order_generation_prompt(
        include_messages=not _use_simple,  # include only when *not* simple
    )

-    # Append goals at the end for focus
-    goals_section = ""
-    if agent_goals:
-        goals_section = (
-            "\n\nYOUR STRATEGIC GOALS:\n" + "\n".join(f"- {g}" for g in agent_goals) + "\n\nKeep these goals in mind when choosing your orders."
-        )
+    # delete unused section from context:
+    context = context.replace('Messages This Round\n\n\nEnd Messages', '')

-    final_prompt = system_prompt + "\n\n" + context + "\n\n" + instructions + goals_section
+    final_prompt = system_prompt + "\n\n" + context + "\n\n" + instructions

    # Make the power names more LLM friendly
    final_prompt = (
--- a/ai_diplomacy/prompts/phase_result_diary_prompt.txt
+++ b/ai_diplomacy/prompts/phase_result_diary_prompt.txt
@ -17,9 +17,6 @@ YOUR RELATIONSHIPS BEFORE THIS PHASE
 YOUR GOALS
 {agent_goals}

-YOUR ACTUAL ORDERS
-{your_actual_orders}
-
 TASK
 Analyze what actually happened this phase compared to negotiations and expectations.

--- a/ai_diplomacy/prompts_simple/context_prompt.txt
+++ b/ai_diplomacy/prompts_simple/context_prompt.txt
@ -8,11 +8,11 @@ Phase: {current_phase}
 Note: You can only build units in your home centers if they are empty. If you lose control of a home center, you cannot build units there, so holding them is critical.

 # Player Status
-Current Goals: {agent_goals}
-Relationships: {agent_relationships}
+Current Goals:
+{agent_goals}

-# Recent Private Diary Entries (Your inner thoughts and plans):
-{agent_private_diary}
+# Relationships:
+{agent_relationships}

 # Order History
 {order_history}
@ -28,6 +28,9 @@ Possible Orders For {current_phase}
 {possible_orders}
 End Possible Orders

+# Recent Private Diary Entries (Your inner thoughts and plans):
+{agent_private_diary}
+
 Messages This Round
 {messages_this_round}
 End Messages
--- a/ai_diplomacy/prompts_simple/diary_consolidation_prompt.txt
+++ b/ai_diplomacy/prompts_simple/diary_consolidation_prompt.txt
@ -4,24 +4,15 @@ Your Power: {power_name}
 GAME CONTEXT
 You are playing Diplomacy, a strategic board game set in pre-WWI Europe. Seven powers compete for control by conquering supply centers. Victory requires 18 supply centers.

-Key game mechanics:
- Spring (S) and Fall (F) movement phases where armies/fleets move
- Fall phases include builds/disbands based on supply center control
- Units can support, convoy, or attack
- All orders resolve simultaneously
- Success often requires negotiated coordination with other powers
-
 FULL DIARY HISTORY
 {full_diary_text}

 TASK
-Create a comprehensive consolidated summary of the most important parts of this diary history. It will serve as your long-term memory.
+Create a concise consolidated summary of the most important parts of this diary history. It will serve as your long-term memory. Do not include anything that is not strategically or diplomatically useful going forward. Aim for 300 words.

 Prioritize the following:
-1.  **Recent Events, Goals & Intentions**
-2.  **Long-Term Strategy:** Enduring goals, rivalries, and alliances that are still relevant.
-3.  **Key Historical Events:** Major betrayals, decisive battles, and significant turning points that shape the current diplomatic landscape.
-4.  **Important Notes:** Any notes you deem important from the history not already included.
+1.  **Key Historical Diplomatic Events:** Prioritise both *strategically impactful* and *recent* events.
+2.  **Information that has ongoing importance & usefulness**

 RESPONSE FORMAT
 Return ONLY the consolidated summary text. Do not include JSON, formatting markers, or meta-commentary.
--- a/ai_diplomacy/prompts_simple/negotiation_diary_prompt.txt
+++ b/ai_diplomacy/prompts_simple/negotiation_diary_prompt.txt
@ -2,35 +2,45 @@ NEGOTIATION SUMMARY REQUEST
 Power: {power_name}
 Phase: {current_phase}

-MESSAGES THIS ROUND
-{messages_this_round}
-
-CURRENT STATUS
-Goals:
+Goals (may need updating):
 {agent_goals}

-Relationships:
+Relationships (may need updating):
 {agent_relationships}

 Game State:
 {board_state_str}

+Private Diary:
+{private_diary_summary}
+
+Messages This Round:
+{messages_this_round}
+
+
+
 TASK
 Analyze the negotiations, goals, relationships, and game state to:
-1. Summarize key outcomes and agreements
-2. State your specific intents for {current_phase}, including moves you have agreed to in negotiations and whether you intend to fulfil them.
+1. Summarize key outcomes and agreements concisely
+2. Concisely state your specific intents for {current_phase}, including moves you have agreed to in negotiations and whether you intend to fulfil them.
 3. Update relationships as needed (Enemy, Unfriendly, Neutral, Friendly, Ally)
-4. Important: You will not see the full negotiation log in the order decision phase, so you must transmit key information about the negotiations to your future self via this summary.
+4. Include your latest overarching goals (including any updates)
+5. Important: You will not see the full negotiation log in the order decision phase, so you must transmit key information about the negotiations to your future self via this summary.

 RESPONSE FORMAT
 Return ONLY a JSON object with this structure:

-{
-"negotiation_summary": "Key outcomes from negotiations",
-"intent": "Specific intent for upcoming orders",
-"updated_relationships": {
-"POWER_NAME": "Enemy|Unfriendly|Neutral|Friendly|Ally"
-}
-}
+{{
+  "negotiation_summary": "Key outcomes from negotiations",
+  "intent": "Specific intent for upcoming orders this phase",
+  "updated_relationships": {{
+    "POWER_NAME": "Enemy|Unfriendly|Neutral|Friendly|Ally"
+  }},
+  "goals": [
+    "goal 1",
+    "goal 2",
+    ...
+  ]
+}}

 Reminder: If you need to quote something, only use single quotes in the actual messages so as not to interfere with the JSON structure.
--- a/ai_diplomacy/prompts_simple/phase_result_diary_prompt.txt
+++ b/ai_diplomacy/prompts_simple/phase_result_diary_prompt.txt
@ -1,7 +1,13 @@
 PHASE RESULT ANALYSIS
-Power: {power_name}
+Your Power: {power_name}
 Phase: {current_phase}

+RECENT DIARY ENTRIES
+{formatted_diary}
+
+BOARD STATE
+{board_state}
+
 PHASE SUMMARY
 {phase_summary}

@ -17,9 +23,6 @@ YOUR RELATIONSHIPS BEFORE THIS PHASE
 YOUR GOALS
 {agent_goals}

-YOUR ACTUAL ORDERS
-{your_actual_orders}
-
 TASK
 Analyze what actually happened this phase compared to negotiations and expectations.

@ -29,12 +32,12 @@ Consider:
 3. SURPRISES: What unexpected moves occurred?
 4. IMPACT: How did these events affect your strategic position?

-Write a reflective diary entry (150-250 words) that:
- Identifies key betrayals or successful collaborations
- Assesses impact on your position
- Updates your understanding of other powers' trustworthiness
- Notes strategic lessons learned
- Adjusts your perception of threats and opportunities
+Write a concise diary entry (100-150 words) of the most important things you would like to remember, e.g.:
+- Key betrayals or successful collaborations
+- Assess impact on your position
+- Update your understanding of other powers' trustworthiness
+- Strategic lessons learned
+- Moves that failed, and ideas on how to avoid the error in the future

 Focus on concrete events and their implications for your future strategy.

--- a/analysis/statistical_game_analysis.py
+++ b/analysis/statistical_game_analysis.py
@ -69,6 +69,12 @@ class StatisticalGameAnalyzer:
        'order_generation', 'order_diary', 'state_update_parsing_empty_or_invalid_data',
        'diary_consolidation', 'state_update_partial_data', 'state_update_no_response'
    ]
+
+    ORDER_TYPES = [
+        "move", "hold", "support", "convoy",
+        "build", "disband", "waive", "other",
+        "retreat"
+    ]
    
    def __init__(self):
        """Initialize analyzer with configuration constants."""
@ -234,6 +240,103 @@ class StatisticalGameAnalyzer:
                
        return responses
    
+    def _extract_order_results_features(self, power: str, phase_data: dict) -> dict:
+        """
+        Count orders and outcomes for a single power in one phase and add
+        a success-rate (0-1) for every order type.
+        """
+        features: dict[str, float | int] = {}
+        for ot in self.ORDER_TYPES:
+            plural = f"{ot}s" if not ot.endswith("s") else ot
+            for metric in ("total", "success", "bounce", "void", "invalid"):
+                features[f"orders_{plural}_{metric}"] = 0
+            features[f"orders_{plural}_success_rate"] = 0.0      # ← new
+
+        orders_by_type = phase_data.get("order_results", {}).get(power, {})
+        if not orders_by_type:
+            return features
+
+        for otype, order_list in orders_by_type.items():
+            otype = otype.lower()
+            if otype not in self.ORDER_TYPES:
+                otype = "other"
+            plural = f"{otype}s" if not otype.endswith("s") else otype
+
+            for entry in order_list:
+                result = str(entry.get("result", "")).lower().strip()
+                key_base = f"orders_{plural}"
+                features[f"{key_base}_total"] += 1
+                match result:
+                    case "success":
+                        features[f"{key_base}_success"] += 1
+                    case "bounce":
+                        features[f"{key_base}_bounce"] += 1
+                    case "invalid":
+                        features[f"{key_base}_invalid"] += 1
+                    case _ if result in ("void", "void: no effect", ""):
+                        features[f"{key_base}_void"] += 1
+
+        # ── derive success rates ──
+        for ot in self.ORDER_TYPES:
+            plural = f"{ot}s" if not ot.endswith("s") else ot
+            succ = features[f"orders_{plural}_success"]
+            tot  = features[f"orders_{plural}_total"]
+            features[f"orders_{plural}_success_rate"] = succ / tot if tot else 0.0
+
+        return features
+
+
+
+    # ────────────────── GAME-LEVEL ORDER TOTALS ──────────────────
+    def _aggregate_order_results(self, power: str, game_data: dict) -> dict:
+        """
+        Sum every order-type/result pair over *all* phases for one power
+        and add success-rate (0-1) columns.
+        """
+        totals: dict[str, float | int] = {}
+        for ot in self.ORDER_TYPES:
+            plural = f"{ot}s" if not ot.endswith("s") else ot
+            for metric in ("total", "success", "bounce", "void", "invalid"):
+                totals[f"orders_{plural}_{metric}"] = 0
+            totals[f"orders_{plural}_success_rate"] = 0.0          # ← new
+
+        for phase in game_data.get("phases", []):
+            orders_by_type = phase.get("order_results", {}).get(power, {})
+            if not orders_by_type:
+                continue
+
+            for otype, order_list in orders_by_type.items():
+                otype = otype.lower()
+                if otype not in self.ORDER_TYPES:
+                    otype = "other"
+                plural = f"{otype}s" if not otype.endswith("s") else otype
+
+                for entry in order_list:
+                    result = str(entry.get("result", "")).lower().strip()
+                    key_base = f"orders_{plural}"
+                    totals[f"{key_base}_total"] += 1
+                    match result:
+                        case "success":
+                            totals[f"{key_base}_success"] += 1
+                        case "bounce":
+                            totals[f"{key_base}_bounce"] += 1
+                        case "invalid":
+                            totals[f"{key_base}_invalid"] += 1
+                        case _ if result in ("void", "void: no effect", ""):
+                            totals[f"{key_base}_void"] += 1
+
+        # ── derive success rates ──
+        for ot in self.ORDER_TYPES:
+            plural = f"{ot}s" if not ot.endswith("s") else ot
+            succ = totals[f"orders_{plural}_success"]
+            tot  = totals[f"orders_{plural}_total"]
+            totals[f"orders_{plural}_success_rate"] = succ / tot if tot else 0.0
+
+        return totals
+
+
+
+
    def _extract_phase_features(self, llm_responses: List[dict], game_data: dict) -> List[dict]:
        """Extract phase-level features for all powers, phases, and response types."""
        phase_features = []
@ -294,6 +397,10 @@ class StatisticalGameAnalyzer:
        # === FAILURE ANALYSIS (HARD MODE) ===
        failure_metrics = self._analyze_failures(power, phase, response_type, llm_responses)
        features.update(failure_metrics)
+
+        # === ORDER-RESULT METRICS ===
+        order_result_features = self._extract_order_results_features(power, phase_data)
+        features.update(order_result_features)
        
        
        # Add response-type specific features
@ -794,7 +901,10 @@ class StatisticalGameAnalyzer:
        if total_calls > 0:
            features['overall_failure_rate_percentage'] = (total_failures / total_calls) * 100.0
            features['overall_success_rate_percentage'] = (total_successes / total_calls) * 100.0
-        
+
+        # === ORDER TOTALS (whole game) ===
+        order_totals = self._aggregate_order_results(power, game_data)
+        features.update(order_totals)
    
    # Helper methods
    
@ -1067,6 +1177,15 @@ class StatisticalGameAnalyzer:
            'military_units_gained_vs_prev_phase',
            'relationships'
        ]
+
+        # ensure order columns
+        for ot in self.ORDER_TYPES:
+            plural = f"{ot}s" if not ot.endswith("s") else ot
+            for suffix in ("total", "success", "bounce", "void", "invalid", "success_rate"):
+                col = f"orders_{plural}_{suffix}"
+                if col not in fieldnames:
+                    fieldnames.append(col)
+
        
        # Ensure all actual fields are included (in case we missed any)
        actual_fields = set()
@ -1140,6 +1259,17 @@ class StatisticalGameAnalyzer:
            # === Diplobench style single scalar game score ===
            'game_score',
        ]
+
+        # ensure order-total columns
+        for ot in self.ORDER_TYPES:
+            plural = f"{ot}s" if not ot.endswith("s") else ot
+            base = f"orders_{plural}_total"
+            for suffix in ("total", "success", "bounce", "void", "invalid", "success_rate"):
+                col = f"orders_{plural}_{suffix}"
+                if col not in fieldnames:
+                    fieldnames.append(col)
+
+
        
        # Ensure all actual fields are included
        actual_fields = set()
--- a/experiment_runner/analysis/statistical_game_analysis.py
+++ b/experiment_runner/analysis/statistical_game_analysis.py
@ -227,6 +227,8 @@ def _plot_relationships_per_game(

        # ── NEW: discard rows with no relationship info ────────────
        game_df = game_df[game_df["rel_dict"].apply(bool)]
+        # ── keep only MOVE phases; drop retreat (R) and adjustment (A) ─────
+        game_df = game_df[game_df["game_phase"].str.upper().str.endswith("M")]
        if game_df.empty:               # nothing left to plot
            continue

@ -334,9 +336,18 @@ def _plot_relationships_per_game(
                        else to_rgba(base_colour, alpha=0.35)
                    )

+                    # ── “double” a lone point so it shows up as a short flat line ──
+                    finite_pts = [(x, y) for x, y in zip(data["x"], y_off) if not math.isnan(y)]
+                    if len(finite_pts) == 1:
+                        x0, y0 = finite_pts[0]
+                        xs = [x0 - 0.05, x0 + 0.05]   # tiny horizontal spread
+                        ys = [y0, y0]
+                    else:
+                        xs, ys = data["x"], y_off
+
                    plt.plot(
-                        data["x"],
-                        y_off,
+                        xs,
+                        ys,
                        label=f"{other} ({kind})",
                        color=colour,
                        linewidth=2,
--- a/experiment_runner/analysis/summary.py
+++ b/experiment_runner/analysis/summary.py
@ -169,6 +169,7 @@ def run(exp_dir: Path, ctx: dict):  # pylint: disable=unused-argument
        sns.set_style("whitegrid")
        plt.figure(figsize=(10, 7))
        sns.boxplot(x="Power", y="SupplyCenters", data=df, palette="pastel")
+        plt.ylim(0, 18)
        plt.title("Supply-center distribution")
        plt.savefig(analysis_dir / "results_summary.png", dpi=150)
        plt.close()
--- a/lm_game.py
+++ b/lm_game.py
@ -334,6 +334,17 @@ async def main():
            if neg_diary_tasks:
                await asyncio.gather(*neg_diary_tasks, return_exceptions=True)

+        # Diary Consolidation
+        if current_short_phase.startswith("S") and current_short_phase.endswith("M"):
+            consolidation_tasks = [
+                run_diary_consolidation(agent, game, llm_log_file_path,
+                                        prompts_dir=agent.prompts_dir)
+                for agent in agents.values()
+                if not game.powers[agent.power_name].is_eliminated()
+            ]
+            if consolidation_tasks:
+                await asyncio.gather(*consolidation_tasks, return_exceptions=True)
+
        # --- 4c. Order Generation ---
        logger.info("Getting orders from agents...")
        board_state = game.get_state()
@ -350,7 +361,7 @@ async def main():
                        game, agent.client, board_state, power_name, possible_orders,
                        game_history, model_error_stats,
                        agent_goals=agent.goals, agent_relationships=agent.relationships,
-                        agent_private_diary_str=agent.format_private_diary_for_prompt(),
+                        agent_private_diary_str=agent.get_latest_phase_diary_entries(), # only include latest phase in orders prompt
                        log_file_path=llm_log_file_path, phase=current_phase,
                    )
                )
@ -378,10 +389,11 @@ async def main():
            submitted_orders_this_phase[p_name] = valid + invalid

            # diary entry only for the orders we tried to submit
-            if valid or invalid:
-                await agents[p_name].generate_order_diary_entry(
-                    game, valid + invalid, llm_log_file_path
-                )
+            if False: # disabled for now
+                if valid or invalid:
+                    await agents[p_name].generate_order_diary_entry(
+                        game, valid + invalid, llm_log_file_path
+                    )
                
        # --- 4d. Process Phase ---
        completed_phase = current_phase
@ -414,26 +426,18 @@ async def main():
        all_orders_this_phase = game.order_history.get(current_short_phase, {})
        
        # Phase Result Diary Entries
-        phase_result_diary_tasks = [
-            agent.generate_phase_result_diary_entry(game, game_history, phase_summary, all_orders_this_phase, llm_log_file_path)
-            for agent in agents.values() if not game.powers[agent.power_name].is_eliminated()
-        ]
-        if phase_result_diary_tasks:
-            await asyncio.gather(*phase_result_diary_tasks, return_exceptions=True)
-
-        # Diary Consolidation
-        if current_short_phase.startswith("S") and current_short_phase.endswith("M"):
-            consolidation_tasks = [
-                run_diary_consolidation(agent, game, llm_log_file_path,
-                                        prompts_dir=agent.prompts_dir)
-                for agent in agents.values()
-                if not game.powers[agent.power_name].is_eliminated()
+        if current_short_phase.endswith("M"):
+            phase_result_diary_tasks = [
+                agent.generate_phase_result_diary_entry(game, game_history, phase_summary, all_orders_this_phase, llm_log_file_path, current_short_phase)
+                for agent in agents.values() if not game.powers[agent.power_name].is_eliminated()
            ]
-            if consolidation_tasks:
-                await asyncio.gather(*consolidation_tasks, return_exceptions=True)
+            if phase_result_diary_tasks:
+                await asyncio.gather(*phase_result_diary_tasks, return_exceptions=True)
+
+        

        # Agent State Updates
-        if current_short_phase.endswith("M"):
+        if current_short_phase.endswith("M") and run_config.num_negotiation_rounds == 0: # r'ships are updated in negotiation round. otherwise in no press, updated in a separate step.
            current_board_state = game.get_state()
            state_update_tasks = [
                agent.analyze_phase_and_update_state(game, current_board_state, phase_summary, game_history, llm_log_file_path)