state update fixes & streamline prompts

This commit is contained in:
sam-paech 2025-07-12 10:17:17 +10:00
parent 1f154a7073
commit b4a56126ec
17 changed files with 710 additions and 281 deletions

View file

@ -97,6 +97,26 @@ class DiplomacyAgent:
logger.info(f"Initialized DiplomacyAgent for {self.power_name} with goals: {self.goals}") logger.info(f"Initialized DiplomacyAgent for {self.power_name} with goals: {self.goals}")
self.add_journal_entry(f"Agent initialized. Initial Goals: {self.goals}") self.add_journal_entry(f"Agent initialized. Initial Goals: {self.goals}")
def _format_board_state(self, board_state_dict):
units = board_state_dict.get('units', {})
centers = board_state_dict.get('centers', {})
eliminated = {power for power, scs in centers.items() if not scs}
parts = ["Units:"]
for power, unit_list in sorted(units.items()):
label = f"{power} (Eliminated)" if power in eliminated else power
parts.append(f" {label}: {', '.join(unit_list)}")
parts.append("Centers:")
for power, center_list in sorted(centers.items()):
label = f"{power} (Eliminated)" if power in eliminated else power
parts.append(f" {label}: {', '.join(center_list)}")
return "\n".join(parts)
def _extract_json_from_text(self, text: str) -> dict: def _extract_json_from_text(self, text: str) -> dict:
"""Extract and parse JSON from text, handling common LLM response formats.""" """Extract and parse JSON from text, handling common LLM response formats."""
if not text or not text.strip(): if not text or not text.strip():
@ -368,6 +388,46 @@ class DiplomacyAgent:
f"[{self.power_name}] DIARY ENTRY ADDED for {phase}. Total full entries: {len(self.full_private_diary)}. New entry: {entry[:100]}..." f"[{self.power_name}] DIARY ENTRY ADDED for {phase}. Total full entries: {len(self.full_private_diary)}. New entry: {entry[:100]}..."
) )
def get_latest_phase_diary_entries(
self,
*,
use_private_diary: bool = False,
separator: str = "\n\n",
) -> str:
"""
Return all diary entries for the most-recent phase.
Args:
use_private_diary: If True look at self.private_diary, otherwise
self.full_private_diary (default).
separator: String to place between entries in the final output.
Returns:
A single formatted string containing every entry from the
latest phase, or an empty string if no diary content exists.
"""
diary: List[str] = self.private_diary if use_private_diary else self.full_private_diary
if not diary:
return ""
# Expect entries like "[S1901M] text…"
phase_match = re.match(r"\[([^\]]+)\]", diary[-1])
if not phase_match:
# Last line didnt start with a phase tag; just return it.
return diary[-1]
latest_phase = phase_match.group(1)
recent_entries: List[str] = []
for entry in reversed(diary):
if entry.startswith(f"[{latest_phase}]"):
recent_entries.append(entry)
else:
break
recent_entries.reverse() # restore chronological order
return separator.join(recent_entries)
def format_private_diary_for_prompt(self) -> str: def format_private_diary_for_prompt(self) -> str:
""" """
Formats the context diary for inclusion in a prompt. Formats the context diary for inclusion in a prompt.
@ -437,12 +497,12 @@ class DiplomacyAgent:
# Prepare context for the prompt # Prepare context for the prompt
board_state_dict = game.get_state() board_state_dict = game.get_state()
board_state_str = f"Units: {board_state_dict.get('units', {})}, Centers: {board_state_dict.get('centers', {})}" board_state_str = self._format_board_state(board_state_dict)
messages_this_round = game_history.get_messages_this_round(power_name=self.power_name, current_phase_name=game.current_short_phase) messages_this_round = game_history.get_messages_this_round(power_name=self.power_name, current_phase_name=game.current_short_phase)
if not messages_this_round.strip() or messages_this_round.startswith("\n(No messages"): if not messages_this_round.strip() or messages_this_round.startswith("\n(No messages"):
messages_this_round = ( messages_this_round = (
"(No messages involving your power this round that require deep reflection for diary. Focus on overall situation.)" "(No messages involving your power this round.)"
) )
current_relationships_str = json.dumps(self.relationships) current_relationships_str = json.dumps(self.relationships)
@ -463,31 +523,34 @@ class DiplomacyAgent:
# Do aggressive preprocessing of the template to fix the problematic patterns # Do aggressive preprocessing of the template to fix the problematic patterns
# This includes removing any newlines or whitespace before JSON keys that cause issues # This includes removing any newlines or whitespace before JSON keys that cause issues
for pattern in ["negotiation_summary", "updated_relationships", "relationship_updates", "intent"]: if False:
# Fix the "\n "key"" pattern that breaks .format() for pattern in ["negotiation_summary", "updated_relationships", "relationship_updates", "intent"]:
prompt_template_content = re.sub(rf'\n\s*"{pattern}"', f'"{pattern}"', prompt_template_content) # Fix the "\n "key"" pattern that breaks .format()
prompt_template_content = re.sub(rf'\n\s*"{pattern}"', f'"{pattern}"', prompt_template_content)
# Escape all curly braces in JSON examples to prevent format() from interpreting them # Escape all curly braces in JSON examples to prevent format() from interpreting them
# First, temporarily replace the actual template variables # First, temporarily replace the actual template variables
temp_vars = [
"power_name", temp_vars = [
"current_phase", "power_name",
"messages_this_round", "current_phase",
"agent_goals", "messages_this_round",
"agent_relationships", "agent_goals",
"board_state_str", "agent_relationships",
"ignored_messages_context", "board_state_str",
] "ignored_messages_context",
for var in temp_vars: "private_diary_summary",
prompt_template_content = prompt_template_content.replace(f"{{{var}}}", f"<<{var}>>") ]
for var in temp_vars:
prompt_template_content = prompt_template_content.replace(f"{{{var}}}", f"<<{var}>>")
# Now escape all remaining braces (which should be JSON) # Now escape all remaining braces (which should be JSON)
prompt_template_content = prompt_template_content.replace("{", "{{") prompt_template_content = prompt_template_content.replace("{", "{{")
prompt_template_content = prompt_template_content.replace("}", "}}") prompt_template_content = prompt_template_content.replace("}", "}}")
# Restore the template variables # Restore the template variables
for var in temp_vars: for var in temp_vars:
prompt_template_content = prompt_template_content.replace(f"<<{var}>>", f"{{{var}}}") prompt_template_content = prompt_template_content.replace(f"<<{var}>>", f"{{{var}}}")
# Create a dictionary with safe values for formatting # Create a dictionary with safe values for formatting
format_vars = { format_vars = {
@ -515,8 +578,6 @@ class DiplomacyAgent:
logger.debug(f"[{self.power_name}] Negotiation diary prompt:\n{full_prompt[:500]}...") logger.debug(f"[{self.power_name}] Negotiation diary prompt:\n{full_prompt[:500]}...")
logger.debug(f"[{self.power_name}] Negotiation diary prompt:\n{full_prompt[:500]}...")
raw_response = await run_llm_and_log( raw_response = await run_llm_and_log(
client=self.client, client=self.client,
prompt=full_prompt, prompt=full_prompt,
@ -567,7 +628,6 @@ class DiplomacyAgent:
diary_text_candidate = parsed_data["intent"] diary_text_candidate = parsed_data["intent"]
else: else:
diary_text_candidate += "\nIntent: " + parsed_data["intent"] diary_text_candidate += "\nIntent: " + parsed_data["intent"]
if diary_text_candidate: if diary_text_candidate:
diary_entry_text = diary_text_candidate diary_entry_text = diary_text_candidate
else: else:
@ -610,6 +670,10 @@ class DiplomacyAgent:
elif new_relationships is not None: # It was provided but not a dict elif new_relationships is not None: # It was provided but not a dict
logger.warning(f"[{self.power_name}] 'updated_relationships' from diary LLM was not a dictionary: {type(new_relationships)}") logger.warning(f"[{self.power_name}] 'updated_relationships' from diary LLM was not a dictionary: {type(new_relationships)}")
# update goals
if "goals" in parsed_data:
self.update_goals(parsed_data["goals"])
# Add the generated (or fallback) diary entry # Add the generated (or fallback) diary entry
self.add_diary_entry(diary_entry_text, game.current_short_phase) self.add_diary_entry(diary_entry_text, game.current_short_phase)
if relationships_updated: if relationships_updated:
@ -627,16 +691,19 @@ class DiplomacyAgent:
self.add_diary_entry(f"(Error generating diary entry: {type(e).__name__})", game.current_short_phase) self.add_diary_entry(f"(Error generating diary entry: {type(e).__name__})", game.current_short_phase)
finally: finally:
if log_file_path: # Ensure log_file_path is provided if log_file_path: # Ensure log_file_path is provided
log_llm_response( try:
log_file_path=log_file_path, log_llm_response(
model_name=self.client.model_name if self.client else "UnknownModel", log_file_path=log_file_path,
power_name=self.power_name, model_name=self.client.model_name if self.client else "UnknownModel",
phase=game.current_short_phase if game else "UnknownPhase", power_name=self.power_name,
response_type="negotiation_diary", # Specific type for CSV logging phase=game.current_short_phase if game else "UnknownPhase",
raw_input_prompt=full_prompt, response_type="negotiation_diary", # Specific type for CSV logging
raw_response=raw_response, raw_input_prompt=full_prompt,
success=success_status, raw_response=raw_response,
) success=success_status,
)
except Exception as e:
print(e)
async def generate_order_diary_entry(self, game: "Game", orders: List[str], log_file_path: str): async def generate_order_diary_entry(self, game: "Game", orders: List[str], log_file_path: str):
""" """
@ -783,105 +850,108 @@ class DiplomacyAgent:
# Rest of the code remains the same # Rest of the code remains the same
async def generate_phase_result_diary_entry( async def generate_phase_result_diary_entry(
self, game: "Game", game_history: "GameHistory", phase_summary: str, all_orders: Dict[str, List[str]], log_file_path: str self, game: "Game", game_history: "GameHistory", phase_summary: str, all_orders: Dict[str, List[str]], log_file_path: str, phase_name: str
): ):
"""
Generates a diary entry analyzing the actual phase results,
comparing them to negotiations and identifying betrayals/collaborations.
"""
logger.info(f"[{self.power_name}] Generating phase result diary entry for {game.current_short_phase}...")
# Load the template
prompt_template = load_prompt("phase_result_diary_prompt.txt", prompts_dir=self.prompts_dir)
if not prompt_template:
logger.error(f"[{self.power_name}] Could not load phase_result_diary_prompt.txt. Skipping diary entry.")
return
# Format all orders for the prompt
all_orders_formatted = ""
for power, orders in all_orders.items():
orders_str = ", ".join(orders) if orders else "No orders"
all_orders_formatted += f"{power}: {orders_str}\n"
# Get your own orders
your_orders = all_orders.get(self.power_name, [])
your_orders_str = ", ".join(your_orders) if your_orders else "No orders"
# Get recent negotiations for this phase
messages_this_phase = game_history.get_messages_by_phase(game.current_short_phase)
your_negotiations = ""
for msg in messages_this_phase:
if msg.sender == self.power_name:
your_negotiations += f"To {msg.recipient}: {msg.content}\n"
elif msg.recipient == self.power_name:
your_negotiations += f"From {msg.sender}: {msg.content}\n"
if not your_negotiations:
your_negotiations = "No negotiations this phase"
# Format relationships
relationships_str = "\n".join([f"{p}: {r}" for p, r in self.relationships.items()])
# Format goals
goals_str = "\n".join([f"- {g}" for g in self.goals]) if self.goals else "None"
# Create the prompt
prompt = prompt_template.format(
power_name=self.power_name,
current_phase=game.current_short_phase,
phase_summary=phase_summary,
all_orders_formatted=all_orders_formatted,
your_negotiations=your_negotiations,
pre_phase_relationships=relationships_str,
agent_goals=goals_str,
your_actual_orders=your_orders_str,
)
logger.debug(f"[{self.power_name}] Phase result diary prompt:\n{prompt[:500]}...")
raw_response = ""
success_status = "FALSE"
try: try:
raw_response = await run_llm_and_log( """
client=self.client, Generates a diary entry analyzing the actual phase results,
prompt=prompt, comparing them to negotiations and identifying betrayals/collaborations.
"""
logger.info(f"[{self.power_name}] Generating phase result diary entry for {game.current_short_phase}...")
# Load the template
prompt_template = load_prompt("phase_result_diary_prompt.txt", prompts_dir=self.prompts_dir)
if not prompt_template:
logger.error(f"[{self.power_name}] Could not load phase_result_diary_prompt.txt. Skipping diary entry.")
return
# Format all orders for the prompt
all_orders_formatted = game_history.get_order_history_for_prompt(
game=game, # Pass the game object for normalization
power_name=self.power_name, power_name=self.power_name,
phase=game.current_short_phase, current_phase_name=game.current_short_phase,
response_type="phase_result_diary", num_movement_phases_to_show=1,
) )
if raw_response and raw_response.strip(): formatted_diary = self.format_private_diary_for_prompt()
# The response should be plain text diary entry
diary_entry = raw_response.strip() board_state_dict = game.get_state()
self.add_diary_entry(diary_entry, game.current_short_phase) board_state_str = self._format_board_state(board_state_dict)
success_status = "TRUE"
logger.info(f"[{self.power_name}] Phase result diary entry generated and added.") # Get recent negotiations for this phase
else: messages_this_round = game_history.get_messages_this_round(power_name=self.power_name, current_phase_name=game.current_short_phase)
fallback_diary = ( if not messages_this_round.strip() or messages_this_round.startswith("\n(No messages"):
f"Phase {game.current_short_phase} completed. Orders executed as: {your_orders_str}. (Failed to generate detailed analysis)" messages_this_round = (
"(No messages involving your power this round.)"
) )
self.add_diary_entry(fallback_diary, game.current_short_phase)
logger.warning(f"[{self.power_name}] Empty response from LLM. Added fallback phase result diary.")
success_status = "FALSE"
except Exception as e: # Format relationships
logger.error(f"[{self.power_name}] Error generating phase result diary: {e}", exc_info=True) relationships_str = "\n".join([f"{p}: {r}" for p, r in self.relationships.items()])
fallback_diary = f"Phase {game.current_short_phase} completed. Unable to analyze results due to error."
self.add_diary_entry(fallback_diary, game.current_short_phase) # Format goals
success_status = f"FALSE: {type(e).__name__}" goals_str = "\n".join([f"- {g}" for g in self.goals]) if self.goals else "None"
finally:
log_llm_response( # Create the prompt
log_file_path=log_file_path, prompt = prompt_template.format(
model_name=self.client.model_name,
power_name=self.power_name, power_name=self.power_name,
phase=game.current_short_phase, current_phase=phase_name,
response_type="phase_result_diary", phase_summary=phase_summary,
raw_input_prompt=prompt, all_orders_formatted=all_orders_formatted,
raw_response=raw_response, your_negotiations=messages_this_round,
success=success_status, pre_phase_relationships=relationships_str,
agent_goals=goals_str,
formatted_diary=formatted_diary,
board_state=board_state_str,
) )
logger.debug(f"[{self.power_name}] Phase result diary prompt:\n{prompt[:500]}...")
raw_response = ""
success_status = "FALSE"
try:
raw_response = await run_llm_and_log(
client=self.client,
prompt=prompt,
power_name=self.power_name,
phase=phase_name,
response_type="phase_result_diary",
)
if raw_response and raw_response.strip():
# The response should be plain text diary entry
diary_entry = raw_response.strip()
self.add_diary_entry(diary_entry, phase_name)
success_status = "TRUE"
logger.info(f"[{self.power_name}] Phase result diary entry generated and added.")
else:
fallback_diary = (
f"Phase {phase_name} completed."
)
self.add_diary_entry(fallback_diary, phase_name)
logger.warning(f"[{self.power_name}] Empty response from LLM. Added fallback phase result diary.")
success_status = "FALSE"
except Exception as e:
logger.error(f"[{self.power_name}] Error generating phase result diary: {e}", exc_info=True)
fallback_diary = f"Phase {phase_name} completed. Unable to analyze results due to error."
self.add_diary_entry(fallback_diary, phase_name)
success_status = f"FALSE: {type(e).__name__}"
finally:
log_llm_response(
log_file_path=log_file_path,
model_name=self.client.model_name,
power_name=self.power_name,
phase=phase_name,
response_type="phase_result_diary",
raw_input_prompt=prompt,
raw_response=raw_response,
success=success_status,
)
except Exception as e:
logger.error(e)
logger.error('!generate_phase_result_diary_entry failed')
def log_state(self, prefix=""): def log_state(self, prefix=""):
logger.debug(f"[{self.power_name}] {prefix} State: Goals={self.goals}, Relationships={self.relationships}") logger.debug(f"[{self.power_name}] {prefix} State: Goals={self.goals}, Relationships={self.relationships}")

View file

@ -1039,7 +1039,7 @@ class OpenRouterClient(BaseModelClient):
logger.debug(f"[{self.model_name}] Initialized OpenRouter client") logger.debug(f"[{self.model_name}] Initialized OpenRouter client")
async def generate_response(self, prompt: str, temperature: float = 0.5, inject_random_seed: bool = True) -> str: async def generate_response(self, prompt: str, temperature: float = 0.0, inject_random_seed: bool = True) -> str:
"""Generate a response using OpenRouter with robust error handling.""" """Generate a response using OpenRouter with robust error handling."""
try: try:
# Append the call to action to the user's prompt # Append the call to action to the user's prompt

View file

@ -11,49 +11,90 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
async def run_diary_consolidation( async def run_diary_consolidation(
agent: "DiplomacyAgent", agent: "DiplomacyAgent",
game: "Game", game: "Game",
log_file_path: str, log_file_path: str,
entries_to_keep_unsummarized: int = 6, years_to_keep_unsummarised: int = 1,
prompts_dir: Optional[str] = None, prompts_dir: Optional[str] = None,
): ):
""" """
Consolidate older diary entries while keeping recent ones. Consolidate older diary entries while keeping recent ones.
This is the logic moved from the DiplomacyAgent class.
Parameters
----------
agent : DiplomacyAgent
game : Game
log_file_path : str
years_to_keep_unsummarised : int, default 1
Number of *distinct years* whose entries remain verbatim.
prompts_dir : Optional[str]
""" """
logger.info(f"[{agent.power_name}] CONSOLIDATION START — {len(agent.full_private_diary)} total full entries") logger.info(
f"[{agent.power_name}] CONSOLIDATION START — "
f"{len(agent.full_private_diary)} total full entries"
)
full_entries = [e for e in agent.full_private_diary if not e.startswith("[CONSOLIDATED HISTORY]")] # Remove any earlier consolidated block first
full_entries = [
e for e in agent.full_private_diary
if not e.startswith("[CONSOLIDATED HISTORY]")
]
if len(full_entries) <= entries_to_keep_unsummarized: if not full_entries:
agent.private_diary = list(agent.full_private_diary) agent.private_diary = []
logger.info(f"[{agent.power_name}] ≤ {entries_to_keep_unsummarized} full entries — skipping consolidation") logger.warning(f"[{agent.power_name}] No diary entries found")
return return
boundary_entry = full_entries[-entries_to_keep_unsummarized] # Extract years by scanning from newest to oldest
match = re.search(r"\[[SFWRAB]\s*(\d{4})", boundary_entry) year_re = re.compile(r"\[[SFWRAB]\s*(\d{4})") # matches “[S1901”, “[F1902”…”
if not match: recent_years: list[int] = []
logger.error(f"[{agent.power_name}] Could not parse year from boundary entry; aborting consolidation")
for entry in reversed(full_entries): # newest last
match = year_re.search(entry)
if not match:
# Lines without a year tag are considered “dateless”; keep them
continue
yr = int(match.group(1))
if yr not in recent_years:
recent_years.append(yr)
if len(recent_years) >= years_to_keep_unsummarised:
break
# If every distinct year falls inside the keep-window, skip consolidation
all_years = {
int(m.group(1))
for e in full_entries
if (m := year_re.search(e))
}
if len(all_years - set(recent_years)) == 0:
agent.private_diary = list(agent.full_private_diary) agent.private_diary = list(agent.full_private_diary)
logger.info(
f"[{agent.power_name}] ≤ {years_to_keep_unsummarised} distinct years "
"— skipping consolidation"
)
return return
cutoff_year = int(match.group(1)) # Partition entries
logger.info(f"[{agent.power_name}] Cut-off year for consolidation: {cutoff_year}") keep_set = set(recent_years)
def _entry_year(entry: str) -> int | None: def _entry_year(entry: str) -> Optional[int]:
m = re.search(r"\[[SFWRAB]\s*(\d{4})", entry) m = year_re.search(entry)
return int(m.group(1)) if m else None return int(m.group(1)) if m else None
entries_to_summarize = [e for e in full_entries if (_entry_year(e) is not None and _entry_year(e) < cutoff_year)] entries_to_keep = [e for e in full_entries if (_entry_year(e) in keep_set)]
entries_to_keep = [e for e in full_entries if (_entry_year(e) is None or _entry_year(e) >= cutoff_year)] entries_to_summarise = [e for e in full_entries if (_entry_year(e) not in keep_set)]
logger.info(f"[{agent.power_name}] Summarising {len(entries_to_summarize)} entries; keeping {len(entries_to_keep)} recent entries verbatim") logger.info(
f"[{agent.power_name}] Summarising {len(entries_to_summarise)} entries "
f"from years < {min(keep_set)}; keeping {len(entries_to_keep)} recent entries verbatim"
)
if not entries_to_summarize: if not entries_to_summarise:
agent.private_diary = list(agent.full_private_diary) agent.private_diary = list(agent.full_private_diary)
logger.warning(f"[{agent.power_name}] No eligible entries to summarise; context diary left unchanged") logger.warning(
f"[{agent.power_name}] No eligible entries to summarise; context diary left unchanged"
)
return return
prompt_template = load_prompt("diary_consolidation_prompt.txt", prompts_dir=prompts_dir) prompt_template = load_prompt("diary_consolidation_prompt.txt", prompts_dir=prompts_dir)
@ -63,7 +104,7 @@ async def run_diary_consolidation(
prompt = prompt_template.format( prompt = prompt_template.format(
power_name=agent.power_name, power_name=agent.power_name,
full_diary_text="\n\n".join(entries_to_summarize), full_diary_text="\n\n".join(entries_to_summarise),
) )
raw_response = "" raw_response = ""
@ -71,7 +112,6 @@ async def run_diary_consolidation(
consolidation_client = None consolidation_client = None
try: try:
consolidation_client = agent.client consolidation_client = agent.client
raw_response = await run_llm_and_log( raw_response = await run_llm_and_log(
client=consolidation_client, client=consolidation_client,
prompt=prompt, prompt=prompt,
@ -87,14 +127,21 @@ async def run_diary_consolidation(
new_summary_entry = f"[CONSOLIDATED HISTORY] {consolidated_text}" new_summary_entry = f"[CONSOLIDATED HISTORY] {consolidated_text}"
agent.private_diary = [new_summary_entry] + entries_to_keep agent.private_diary = [new_summary_entry] + entries_to_keep
success_flag = "TRUE" success_flag = "TRUE"
logger.info(f"[{agent.power_name}] Consolidation complete — {len(agent.private_diary)} context entries now") logger.info(
f"[{agent.power_name}] Consolidation complete — "
f"{len(agent.private_diary)} context entries now"
)
except Exception as exc: except Exception as exc:
logger.error(f"[{agent.power_name}] Diary consolidation failed: {exc}", exc_info=True) logger.error(f"[{agent.power_name}] Diary consolidation failed: {exc}", exc_info=True)
finally: finally:
log_llm_response( log_llm_response(
log_file_path=log_file_path, log_file_path=log_file_path,
model_name=(consolidation_client.model_name if consolidation_client is not None else agent.client.model_name), model_name=(
consolidation_client.model_name
if consolidation_client is not None
else agent.client.model_name
),
power_name=agent.power_name, power_name=agent.power_name,
phase=game.current_short_phase, phase=game.current_short_phase,
response_type="diary_consolidation", response_type="diary_consolidation",

View file

@ -182,7 +182,7 @@ class GameHistory:
eng2code = {"AUSTRIA": "AUT", "ENGLAND": "ENG", "FRANCE": "FRA", "GERMANY": "GER", "ITALY": "ITA", "RUSSIA": "RUS", "TURKEY": "TUR"} eng2code = {"AUSTRIA": "AUT", "ENGLAND": "ENG", "FRANCE": "FRA", "GERMANY": "GER", "ITALY": "ITA", "RUSSIA": "RUS", "TURKEY": "TUR"}
norm = game.map.norm norm = game.map.norm
out_lines = ["**ORDER HISTORY (Recent Rounds)**"] out_lines = []
for ph in phases_to_report: for ph in phases_to_report:
if not (ph.orders_by_power or ph.submitted_orders_by_power): if not (ph.orders_by_power or ph.submitted_orders_by_power):
@ -234,8 +234,14 @@ class GameHistory:
tag = "bounce" tag = "bounce"
elif "void" == tag: elif "void" == tag:
tag = "void: no effect" tag = "void: no effect"
out_lines.append(f" {order} ({tag})") # don't show (success) tag for hold moves, it might be causing convergence on
# always-hold behaviour
is_hold = any(kw in order.upper() for kw in (" H", " HOLD"))
if tag == "success" and is_hold:
out_lines.append(f" {order}")
else:
out_lines.append(f" {order} ({tag})")
seen_ok.add(_norm_keep(order)) seen_ok.add(_norm_keep(order))
# 2⃣ invalid submissions # 2⃣ invalid submissions
@ -246,6 +252,139 @@ class GameHistory:
return "\n(No orders were issued in recent history)\n" return "\n(No orders were issued in recent history)\n"
return "\n".join(out_lines) return "\n".join(out_lines)
def get_orders_history_for_phase(
self,
game: "Game",
phase_name: str, # ← the single phase we want
) -> Dict[str, Dict[str, List[Dict[str, str]]]]:
"""
Return the orders for `phase_name` as:
{
"<POWER>": {
"<order_type>": [
{"order": "<order str>", "result": "<result str>"},
...
],
...
},
...
}
Order types: move, hold, support, convoy, build, disband, waive, other.
"""
# ── locate the requested phase ──────────────────────────────
target_phase = next((p for p in self.phases if p.name == phase_name), None)
if not target_phase or not (target_phase.orders_by_power or target_phase.submitted_orders_by_power):
return {}
# ── helpers ───────────────────────────────────────────────
def _scalar(res):
"""Flatten lists/dicts to a single outcome token."""
tag = res
while isinstance(tag, list):
tag = tag[0] if tag else ""
if isinstance(tag, dict):
tag = tag.get("outcome") or tag.get("result") or ""
return str(tag).strip().lower()
def _order_type(order: str) -> str:
o = order.upper()
if o == "WAIVE":
return "waive"
if " H" in o or " HOLD" in o:
return "hold"
if " S " in o:
return "support"
if " C " in o:
return "convoy"
if " R " in o:
return "retreat"
if " - " in o:
return "move"
if " BUILD" in o or o.endswith(" B") or " B " in o:
return "build"
if " DISBAND" in o or o.endswith(" D") or " D " in o:
return "disband"
return "other"
# engine fallback
engine_phases = {ph.name: ph for ph in getattr(game, "get_phase_history", lambda: [])()}
eng2code = {
"AUSTRIA": "AUT", "ENGLAND": "ENG", "FRANCE": "FRA",
"GERMANY": "GER", "ITALY": "ITA", "RUSSIA": "RUS", "TURKEY": "TUR",
}
norm = game.map.norm
orders_by_power = defaultdict(lambda: defaultdict(list))
# iterate powers present in this phase
for pwr in sorted(set(target_phase.orders_by_power) | set(target_phase.submitted_orders_by_power)):
submitted = target_phase.submitted_orders_by_power.get(pwr, [])
accepted = target_phase.orders_by_power.get(pwr, [])
if isinstance(submitted, str):
submitted = [submitted]
if isinstance(accepted, str):
accepted = [accepted]
def _norm_keep(o):
return o if o.upper() == "WAIVE" else norm(o)
sub_norm = {_norm_keep(o): o for o in submitted}
acc_norm = {_norm_keep(o): o for o in accepted}
# outcome source
raw_res = target_phase.results_by_power.get(pwr) or target_phase.results_by_power or {}
if not raw_res:
eng = engine_phases.get(target_phase.name)
if eng and hasattr(eng, "order_results"):
key = next((k for k, v in eng2code.items() if v == pwr), None)
raw_res = (eng.order_results or {}).get(key, {})
seen_ok = set()
# accepted orders
for idx, order in enumerate(accepted):
if isinstance(raw_res, dict):
res_raw = raw_res.get(order) or raw_res.get(" ".join(order.split()[:2]))
elif isinstance(raw_res, list) and idx < len(raw_res):
res_raw = raw_res[idx]
else:
res_raw = ""
tag = _scalar(res_raw)
if not tag or tag == "ok":
tag = "success"
elif "bounce" in tag:
tag = "bounce"
elif "void" == tag:
tag = "void: no effect"
result_field = tag
orders_by_power[pwr][_order_type(order)].append(
{"order": order, "result": result_field}
)
seen_ok.add(_norm_keep(order))
# invalid submissions
for k in sorted(set(sub_norm) - seen_ok):
order_str = sub_norm[k]
orders_by_power[pwr][_order_type(order_str)].append(
{"order": order_str, "result": "invalid"}
)
# convert nested defaultdicts to regular dicts
return {
pwr: {otype: lst for otype, lst in type_map.items()}
for pwr, type_map in orders_by_power.items()
}
def get_messages_this_round(self, power_name: str, current_phase_name: str) -> str: def get_messages_this_round(self, power_name: str, current_phase_name: str) -> str:
current_phase: Optional[Phase] = None current_phase: Optional[Phase] = None
for phase_obj in self.phases: for phase_obj in self.phases:

View file

@ -133,7 +133,7 @@ def save_game_state(
if year_val is not None and year_val > run_config.max_year: if year_val is not None and year_val > run_config.max_year:
break break
phase_name = phase_block["name"] phase_name = phase_block["name"]
# 3a. Re-attach anything we cached from a previous save. # 3a. Re-attach anything we cached from a previous save.
if phase_name in previous_phase_extras: if phase_name in previous_phase_extras:
@ -151,12 +151,15 @@ def save_game_state(
# ------------------------------------------------------------------- # -------------------------------------------------------------------
phase_block["config"] = cfg phase_block["config"] = cfg
phase_block["state_agents"] = current_state_agents phase_block["state_agents"] = current_state_agents
phase_block["order_results"] = game_history.get_orders_history_for_phase(
game, completed_phase_name
)
# -------------------------------------------------------------- # # -------------------------------------------------------------- #
# 4. Attach top-level metadata and write atomically. # # 4. Attach top-level metadata and write atomically. #
# -------------------------------------------------------------- # # -------------------------------------------------------------- #
saved_game["phase_summaries"] = getattr(game, "phase_summaries", {}) saved_game["phase_summaries"] = getattr(game, "phase_summaries", {})
saved_game["final_agent_states"] = {p_name: {"relationships": a.relationships, "goals": a.goals} for p_name, a in agents.items()} saved_game["final_agent_states"] = {p_name: {"relationships": a.relationships, "goals": a.goals} for p_name, a in agents.items()}
# Filter out phases > max_year # Filter out phases > max_year
# saved_game["phases"] = [ # saved_game["phases"] = [
@ -210,8 +213,8 @@ def load_game_state(
last_phase = saved_game_data["phases"][-1] last_phase = saved_game_data["phases"][-1]
# Wipe the data that must be regenerated **but preserve the keys** # Wipe the data that must be regenerated **but preserve the keys**
last_phase["orders"] = {} # was dict last_phase["orders"] = {}
last_phase["results"] = {} # was dict last_phase["results"] = {}
last_phase["messages"] = [] last_phase["messages"] = []
game = from_saved_game_format(saved_game_data) game = from_saved_game_format(saved_game_data)

View file

@ -158,7 +158,7 @@ async def initialize_agent_state_ext(
# Fallback if LLM data was not applied or parsing failed # Fallback if LLM data was not applied or parsing failed
if not initial_goals_applied: if not initial_goals_applied:
if not agent.goals: # Only set defaults if no goals were set during agent construction or by LLM if not agent.goals: # Only set defaults if no goals were set during agent construction or by LLM
agent.goals = ["Survive and expand", "Form beneficial alliances", "Secure key territories"] agent.goals = []
agent.add_journal_entry(f"[{current_phase}] Set default initial goals as LLM provided none or parse failed.") agent.add_journal_entry(f"[{current_phase}] Set default initial goals as LLM provided none or parse failed.")
logger.info(f"[{power_name}] Default goals set.") logger.info(f"[{power_name}] Default goals set.")
@ -180,7 +180,7 @@ async def initialize_agent_state_ext(
success_status = f"Failure: Exception ({type(e).__name__})" success_status = f"Failure: Exception ({type(e).__name__})"
# Fallback logic for goals/relationships if not already set by earlier fallbacks # Fallback logic for goals/relationships if not already set by earlier fallbacks
if not agent.goals: if not agent.goals:
agent.goals = ["Survive and expand", "Form beneficial alliances", "Secure key territories"] agent.goals = []
logger.info(f"[{power_name}] Set fallback goals after top-level error: {agent.goals}") logger.info(f"[{power_name}] Set fallback goals after top-level error: {agent.goals}")
if not agent.relationships or all(r == "Neutral" for r in agent.relationships.values()): if not agent.relationships or all(r == "Neutral" for r in agent.relationships.values()):
agent.relationships = {p: "Neutral" for p in ALL_POWERS if p != power_name} agent.relationships = {p: "Neutral" for p in ALL_POWERS if p != power_name}

View file

@ -31,6 +31,9 @@ async def conduct_negotiations(
Conducts a round-robin conversation among all non-eliminated powers. Conducts a round-robin conversation among all non-eliminated powers.
Each power can send up to 'max_rounds' messages, choosing between private Each power can send up to 'max_rounds' messages, choosing between private
and global messages each turn. Uses asyncio for concurrent message generation. and global messages each turn. Uses asyncio for concurrent message generation.
NEW: Prevents a power from sending a private message to the same recipient
in two consecutive rounds if that recipient has not replied yet.
""" """
logger.info("Starting negotiation phase.") logger.info("Starting negotiation phase.")
@ -43,6 +46,11 @@ async def conduct_negotiations(
else: else:
logger.info("No eliminated powers yet.") logger.info("No eliminated powers yet.")
# ── new tracking for consecutive private messages ───────────────
last_sent_round: Dict[tuple[str, str], int] = {}
awaiting_reply: Dict[tuple[str, str], bool] = {}
# ────────────────────────────────────────────────────────────────
# We do up to 'max_rounds' single-message turns for each power # We do up to 'max_rounds' single-message turns for each power
for round_index in range(max_rounds): for round_index in range(max_rounds):
logger.info(f"Negotiation Round {round_index + 1}/{max_rounds}") logger.info(f"Negotiation Round {round_index + 1}/{max_rounds}")
@ -99,14 +107,13 @@ async def conduct_negotiations(
if isinstance(result, Exception): if isinstance(result, Exception):
logger.error(f"Error getting conversation reply for {power_name}: {result}", exc_info=result) logger.error(f"Error getting conversation reply for {power_name}: {result}", exc_info=result)
# Use model_name for stats key if possible
if model_name in model_error_stats: if model_name in model_error_stats:
model_error_stats[model_name]["conversation_errors"] += 1 model_error_stats[model_name]["conversation_errors"] += 1
else: # Fallback to power_name if model name not tracked (shouldn't happen) else:
model_error_stats.setdefault(power_name, {}).setdefault("conversation_errors", 0) model_error_stats.setdefault(power_name, {}).setdefault("conversation_errors", 0)
model_error_stats[power_name]["conversation_errors"] += 1 model_error_stats[power_name]["conversation_errors"] += 1
messages = [] # Treat as no messages on error messages = []
elif result is None: # Handle case where client might return None on internal error elif result is None:
logger.warning(f"Received None instead of messages for {power_name}.") logger.warning(f"Received None instead of messages for {power_name}.")
messages = [] messages = []
if model_name in model_error_stats: if model_name in model_error_stats:
@ -115,48 +122,65 @@ async def conduct_negotiations(
model_error_stats.setdefault(power_name, {}).setdefault("conversation_errors", 0) model_error_stats.setdefault(power_name, {}).setdefault("conversation_errors", 0)
model_error_stats[power_name]["conversation_errors"] += 1 model_error_stats[power_name]["conversation_errors"] += 1
else: else:
messages = result # result is the list of message dicts messages = result
logger.debug(f"Received {len(messages)} message(s) from {power_name}.") logger.debug(f"Received {len(messages)} message(s) from {power_name}.")
# Process the received messages (same logic as before) if not messages:
if messages:
for message in messages:
# Validate message structure
if not isinstance(message, dict) or "content" not in message:
logger.warning(f"Invalid message format received from {power_name}: {message}. Skipping.")
continue
# Create an official message in the Diplomacy engine
# Determine recipient based on message type
if message.get("message_type") == "private":
recipient = normalize_recipient_name(message.get("recipient", GLOBAL)) # Default to GLOBAL if recipient missing somehow
if recipient not in game.powers and recipient != GLOBAL:
logger.warning(f"Invalid recipient '{recipient}' in message from {power_name}. Sending globally.")
recipient = GLOBAL # Fallback to GLOBAL if recipient power is invalid
else: # Assume global if not private or type is missing
recipient = GLOBAL
diplo_message = Message(
phase=game.current_short_phase,
sender=power_name,
recipient=recipient, # Use determined recipient
message=message.get("content", ""), # Use .get for safety
time_sent=None, # Let the engine assign time
)
game.add_message(diplo_message)
# Also add to our custom history
game_history.add_message(
game.current_short_phase,
power_name,
recipient, # Use determined recipient here too
message.get("content", ""), # Use .get for safety
)
journal_recipient = f"to {recipient}" if recipient != GLOBAL else "globally"
agent.add_journal_entry(f"Sent message {journal_recipient} in {game.current_short_phase}: {message.get('content', '')[:100]}...")
logger.info(f"[{power_name} -> {recipient}] {message.get('content', '')[:100]}...")
else:
logger.debug(f"No valid messages returned or error occurred for {power_name}.") logger.debug(f"No valid messages returned or error occurred for {power_name}.")
# Error stats handled above based on result type continue
for message in messages:
if not isinstance(message, dict) or "content" not in message:
logger.warning(f"Invalid message format received from {power_name}: {message}. Skipping.")
continue
# Determine recipient
if message.get("message_type") == "private":
recipient = normalize_recipient_name(message.get("recipient", GLOBAL))
if recipient not in game.powers and recipient != GLOBAL:
logger.warning(f"Invalid recipient '{recipient}' in message from {power_name}. Sending globally.")
recipient = GLOBAL
else:
recipient = GLOBAL
# ── repetition guard for private messages ─────────────
if recipient != GLOBAL:
pair = (power_name, recipient)
if awaiting_reply.get(pair, False) and last_sent_round.get(pair) == round_index - 1:
logger.info(
f"Discarding repeat private message from {power_name} to {recipient} "
f"(waiting for reply since last round)."
)
continue # skip this message
# record outbound and set waiting flag
last_sent_round[pair] = round_index
awaiting_reply[pair] = True
# recipient has now been contacted; when they respond, we'll clear the flag for the reverse pair
awaiting_reply[(recipient, power_name)] = False
# ─────────────────────────────────────────────────────
diplo_message = Message(
phase=game.current_short_phase,
sender=power_name,
recipient=recipient,
message=message.get("content", ""),
time_sent=None,
)
game.add_message(diplo_message)
game_history.add_message(
game.current_short_phase,
power_name,
recipient,
message.get("content", ""),
)
journal_recipient = f"to {recipient}" if recipient != GLOBAL else "globally"
agent.add_journal_entry(
f"Sent message {journal_recipient} in {game.current_short_phase}: "
f"{message.get('content', '')[:100]}..."
)
logger.info(f"[{power_name} -> {recipient}] {message.get('content', '')[:100]}...")
logger.info("Negotiation phase complete.") logger.info("Negotiation phase complete.")
return game_history return game_history

View file

@ -214,14 +214,10 @@ def construct_order_generation_prompt(
include_messages=not _use_simple, # include only when *not* simple include_messages=not _use_simple, # include only when *not* simple
) )
# Append goals at the end for focus # delete unused section from context:
goals_section = "" context = context.replace('Messages This Round\n\n\nEnd Messages', '')
if agent_goals:
goals_section = (
"\n\nYOUR STRATEGIC GOALS:\n" + "\n".join(f"- {g}" for g in agent_goals) + "\n\nKeep these goals in mind when choosing your orders."
)
final_prompt = system_prompt + "\n\n" + context + "\n\n" + instructions + goals_section final_prompt = system_prompt + "\n\n" + context + "\n\n" + instructions
# Make the power names more LLM friendly # Make the power names more LLM friendly
final_prompt = ( final_prompt = (

View file

@ -17,9 +17,6 @@ YOUR RELATIONSHIPS BEFORE THIS PHASE
YOUR GOALS YOUR GOALS
{agent_goals} {agent_goals}
YOUR ACTUAL ORDERS
{your_actual_orders}
TASK TASK
Analyze what actually happened this phase compared to negotiations and expectations. Analyze what actually happened this phase compared to negotiations and expectations.

View file

@ -8,11 +8,11 @@ Phase: {current_phase}
Note: You can only build units in your home centers if they are empty. If you lose control of a home center, you cannot build units there, so holding them is critical. Note: You can only build units in your home centers if they are empty. If you lose control of a home center, you cannot build units there, so holding them is critical.
# Player Status # Player Status
Current Goals: {agent_goals} Current Goals:
Relationships: {agent_relationships} {agent_goals}
# Recent Private Diary Entries (Your inner thoughts and plans): # Relationships:
{agent_private_diary} {agent_relationships}
# Order History # Order History
{order_history} {order_history}
@ -28,6 +28,9 @@ Possible Orders For {current_phase}
{possible_orders} {possible_orders}
End Possible Orders End Possible Orders
# Recent Private Diary Entries (Your inner thoughts and plans):
{agent_private_diary}
Messages This Round Messages This Round
{messages_this_round} {messages_this_round}
End Messages End Messages

View file

@ -4,24 +4,15 @@ Your Power: {power_name}
GAME CONTEXT GAME CONTEXT
You are playing Diplomacy, a strategic board game set in pre-WWI Europe. Seven powers compete for control by conquering supply centers. Victory requires 18 supply centers. You are playing Diplomacy, a strategic board game set in pre-WWI Europe. Seven powers compete for control by conquering supply centers. Victory requires 18 supply centers.
Key game mechanics:
- Spring (S) and Fall (F) movement phases where armies/fleets move
- Fall phases include builds/disbands based on supply center control
- Units can support, convoy, or attack
- All orders resolve simultaneously
- Success often requires negotiated coordination with other powers
FULL DIARY HISTORY FULL DIARY HISTORY
{full_diary_text} {full_diary_text}
TASK TASK
Create a comprehensive consolidated summary of the most important parts of this diary history. It will serve as your long-term memory. Create a concise consolidated summary of the most important parts of this diary history. It will serve as your long-term memory. Do not include anything that is not strategically or diplomatically useful going forward. Aim for 300 words.
Prioritize the following: Prioritize the following:
1. **Recent Events, Goals & Intentions** 1. **Key Historical Diplomatic Events:** Prioritise both *strategically impactful* and *recent* events.
2. **Long-Term Strategy:** Enduring goals, rivalries, and alliances that are still relevant. 2. **Information that has ongoing importance & usefulness**
3. **Key Historical Events:** Major betrayals, decisive battles, and significant turning points that shape the current diplomatic landscape.
4. **Important Notes:** Any notes you deem important from the history not already included.
RESPONSE FORMAT RESPONSE FORMAT
Return ONLY the consolidated summary text. Do not include JSON, formatting markers, or meta-commentary. Return ONLY the consolidated summary text. Do not include JSON, formatting markers, or meta-commentary.

View file

@ -2,35 +2,45 @@ NEGOTIATION SUMMARY REQUEST
Power: {power_name} Power: {power_name}
Phase: {current_phase} Phase: {current_phase}
MESSAGES THIS ROUND Goals (may need updating):
{messages_this_round}
CURRENT STATUS
Goals:
{agent_goals} {agent_goals}
Relationships: Relationships (may need updating):
{agent_relationships} {agent_relationships}
Game State: Game State:
{board_state_str} {board_state_str}
Private Diary:
{private_diary_summary}
Messages This Round:
{messages_this_round}
TASK TASK
Analyze the negotiations, goals, relationships, and game state to: Analyze the negotiations, goals, relationships, and game state to:
1. Summarize key outcomes and agreements 1. Summarize key outcomes and agreements concisely
2. State your specific intents for {current_phase}, including moves you have agreed to in negotiations and whether you intend to fulfil them. 2. Concisely state your specific intents for {current_phase}, including moves you have agreed to in negotiations and whether you intend to fulfil them.
3. Update relationships as needed (Enemy, Unfriendly, Neutral, Friendly, Ally) 3. Update relationships as needed (Enemy, Unfriendly, Neutral, Friendly, Ally)
4. Important: You will not see the full negotiation log in the order decision phase, so you must transmit key information about the negotiations to your future self via this summary. 4. Include your latest overarching goals (including any updates)
5. Important: You will not see the full negotiation log in the order decision phase, so you must transmit key information about the negotiations to your future self via this summary.
RESPONSE FORMAT RESPONSE FORMAT
Return ONLY a JSON object with this structure: Return ONLY a JSON object with this structure:
{ {{
"negotiation_summary": "Key outcomes from negotiations", "negotiation_summary": "Key outcomes from negotiations",
"intent": "Specific intent for upcoming orders", "intent": "Specific intent for upcoming orders this phase",
"updated_relationships": { "updated_relationships": {{
"POWER_NAME": "Enemy|Unfriendly|Neutral|Friendly|Ally" "POWER_NAME": "Enemy|Unfriendly|Neutral|Friendly|Ally"
} }},
} "goals": [
"goal 1",
"goal 2",
...
]
}}
Reminder: If you need to quote something, only use single quotes in the actual messages so as not to interfere with the JSON structure. Reminder: If you need to quote something, only use single quotes in the actual messages so as not to interfere with the JSON structure.

View file

@ -1,7 +1,13 @@
PHASE RESULT ANALYSIS PHASE RESULT ANALYSIS
Power: {power_name} Your Power: {power_name}
Phase: {current_phase} Phase: {current_phase}
RECENT DIARY ENTRIES
{formatted_diary}
BOARD STATE
{board_state}
PHASE SUMMARY PHASE SUMMARY
{phase_summary} {phase_summary}
@ -17,9 +23,6 @@ YOUR RELATIONSHIPS BEFORE THIS PHASE
YOUR GOALS YOUR GOALS
{agent_goals} {agent_goals}
YOUR ACTUAL ORDERS
{your_actual_orders}
TASK TASK
Analyze what actually happened this phase compared to negotiations and expectations. Analyze what actually happened this phase compared to negotiations and expectations.
@ -29,12 +32,12 @@ Consider:
3. SURPRISES: What unexpected moves occurred? 3. SURPRISES: What unexpected moves occurred?
4. IMPACT: How did these events affect your strategic position? 4. IMPACT: How did these events affect your strategic position?
Write a reflective diary entry (150-250 words) that: Write a concise diary entry (100-150 words) of the most important things you would like to remember, e.g.:
- Identifies key betrayals or successful collaborations - Key betrayals or successful collaborations
- Assesses impact on your position - Assess impact on your position
- Updates your understanding of other powers' trustworthiness - Update your understanding of other powers' trustworthiness
- Notes strategic lessons learned - Strategic lessons learned
- Adjusts your perception of threats and opportunities - Moves that failed, and ideas on how to avoid the error in the future
Focus on concrete events and their implications for your future strategy. Focus on concrete events and their implications for your future strategy.

View file

@ -69,6 +69,12 @@ class StatisticalGameAnalyzer:
'order_generation', 'order_diary', 'state_update_parsing_empty_or_invalid_data', 'order_generation', 'order_diary', 'state_update_parsing_empty_or_invalid_data',
'diary_consolidation', 'state_update_partial_data', 'state_update_no_response' 'diary_consolidation', 'state_update_partial_data', 'state_update_no_response'
] ]
ORDER_TYPES = [
"move", "hold", "support", "convoy",
"build", "disband", "waive", "other",
"retreat"
]
def __init__(self): def __init__(self):
"""Initialize analyzer with configuration constants.""" """Initialize analyzer with configuration constants."""
@ -234,6 +240,103 @@ class StatisticalGameAnalyzer:
return responses return responses
def _extract_order_results_features(self, power: str, phase_data: dict) -> dict:
"""
Count orders and outcomes for a single power in one phase and add
a success-rate (0-1) for every order type.
"""
features: dict[str, float | int] = {}
for ot in self.ORDER_TYPES:
plural = f"{ot}s" if not ot.endswith("s") else ot
for metric in ("total", "success", "bounce", "void", "invalid"):
features[f"orders_{plural}_{metric}"] = 0
features[f"orders_{plural}_success_rate"] = 0.0 # ← new
orders_by_type = phase_data.get("order_results", {}).get(power, {})
if not orders_by_type:
return features
for otype, order_list in orders_by_type.items():
otype = otype.lower()
if otype not in self.ORDER_TYPES:
otype = "other"
plural = f"{otype}s" if not otype.endswith("s") else otype
for entry in order_list:
result = str(entry.get("result", "")).lower().strip()
key_base = f"orders_{plural}"
features[f"{key_base}_total"] += 1
match result:
case "success":
features[f"{key_base}_success"] += 1
case "bounce":
features[f"{key_base}_bounce"] += 1
case "invalid":
features[f"{key_base}_invalid"] += 1
case _ if result in ("void", "void: no effect", ""):
features[f"{key_base}_void"] += 1
# ── derive success rates ──
for ot in self.ORDER_TYPES:
plural = f"{ot}s" if not ot.endswith("s") else ot
succ = features[f"orders_{plural}_success"]
tot = features[f"orders_{plural}_total"]
features[f"orders_{plural}_success_rate"] = succ / tot if tot else 0.0
return features
# ────────────────── GAME-LEVEL ORDER TOTALS ──────────────────
def _aggregate_order_results(self, power: str, game_data: dict) -> dict:
"""
Sum every order-type/result pair over *all* phases for one power
and add success-rate (0-1) columns.
"""
totals: dict[str, float | int] = {}
for ot in self.ORDER_TYPES:
plural = f"{ot}s" if not ot.endswith("s") else ot
for metric in ("total", "success", "bounce", "void", "invalid"):
totals[f"orders_{plural}_{metric}"] = 0
totals[f"orders_{plural}_success_rate"] = 0.0 # ← new
for phase in game_data.get("phases", []):
orders_by_type = phase.get("order_results", {}).get(power, {})
if not orders_by_type:
continue
for otype, order_list in orders_by_type.items():
otype = otype.lower()
if otype not in self.ORDER_TYPES:
otype = "other"
plural = f"{otype}s" if not otype.endswith("s") else otype
for entry in order_list:
result = str(entry.get("result", "")).lower().strip()
key_base = f"orders_{plural}"
totals[f"{key_base}_total"] += 1
match result:
case "success":
totals[f"{key_base}_success"] += 1
case "bounce":
totals[f"{key_base}_bounce"] += 1
case "invalid":
totals[f"{key_base}_invalid"] += 1
case _ if result in ("void", "void: no effect", ""):
totals[f"{key_base}_void"] += 1
# ── derive success rates ──
for ot in self.ORDER_TYPES:
plural = f"{ot}s" if not ot.endswith("s") else ot
succ = totals[f"orders_{plural}_success"]
tot = totals[f"orders_{plural}_total"]
totals[f"orders_{plural}_success_rate"] = succ / tot if tot else 0.0
return totals
def _extract_phase_features(self, llm_responses: List[dict], game_data: dict) -> List[dict]: def _extract_phase_features(self, llm_responses: List[dict], game_data: dict) -> List[dict]:
"""Extract phase-level features for all powers, phases, and response types.""" """Extract phase-level features for all powers, phases, and response types."""
phase_features = [] phase_features = []
@ -294,6 +397,10 @@ class StatisticalGameAnalyzer:
# === FAILURE ANALYSIS (HARD MODE) === # === FAILURE ANALYSIS (HARD MODE) ===
failure_metrics = self._analyze_failures(power, phase, response_type, llm_responses) failure_metrics = self._analyze_failures(power, phase, response_type, llm_responses)
features.update(failure_metrics) features.update(failure_metrics)
# === ORDER-RESULT METRICS ===
order_result_features = self._extract_order_results_features(power, phase_data)
features.update(order_result_features)
# Add response-type specific features # Add response-type specific features
@ -794,7 +901,10 @@ class StatisticalGameAnalyzer:
if total_calls > 0: if total_calls > 0:
features['overall_failure_rate_percentage'] = (total_failures / total_calls) * 100.0 features['overall_failure_rate_percentage'] = (total_failures / total_calls) * 100.0
features['overall_success_rate_percentage'] = (total_successes / total_calls) * 100.0 features['overall_success_rate_percentage'] = (total_successes / total_calls) * 100.0
# === ORDER TOTALS (whole game) ===
order_totals = self._aggregate_order_results(power, game_data)
features.update(order_totals)
# Helper methods # Helper methods
@ -1067,6 +1177,15 @@ class StatisticalGameAnalyzer:
'military_units_gained_vs_prev_phase', 'military_units_gained_vs_prev_phase',
'relationships' 'relationships'
] ]
# ensure order columns
for ot in self.ORDER_TYPES:
plural = f"{ot}s" if not ot.endswith("s") else ot
for suffix in ("total", "success", "bounce", "void", "invalid", "success_rate"):
col = f"orders_{plural}_{suffix}"
if col not in fieldnames:
fieldnames.append(col)
# Ensure all actual fields are included (in case we missed any) # Ensure all actual fields are included (in case we missed any)
actual_fields = set() actual_fields = set()
@ -1140,6 +1259,17 @@ class StatisticalGameAnalyzer:
# === Diplobench style single scalar game score === # === Diplobench style single scalar game score ===
'game_score', 'game_score',
] ]
# ensure order-total columns
for ot in self.ORDER_TYPES:
plural = f"{ot}s" if not ot.endswith("s") else ot
base = f"orders_{plural}_total"
for suffix in ("total", "success", "bounce", "void", "invalid", "success_rate"):
col = f"orders_{plural}_{suffix}"
if col not in fieldnames:
fieldnames.append(col)
# Ensure all actual fields are included # Ensure all actual fields are included
actual_fields = set() actual_fields = set()

View file

@ -227,6 +227,8 @@ def _plot_relationships_per_game(
# ── NEW: discard rows with no relationship info ──────────── # ── NEW: discard rows with no relationship info ────────────
game_df = game_df[game_df["rel_dict"].apply(bool)] game_df = game_df[game_df["rel_dict"].apply(bool)]
# ── keep only MOVE phases; drop retreat (R) and adjustment (A) ─────
game_df = game_df[game_df["game_phase"].str.upper().str.endswith("M")]
if game_df.empty: # nothing left to plot if game_df.empty: # nothing left to plot
continue continue
@ -334,9 +336,18 @@ def _plot_relationships_per_game(
else to_rgba(base_colour, alpha=0.35) else to_rgba(base_colour, alpha=0.35)
) )
# ── “double” a lone point so it shows up as a short flat line ──
finite_pts = [(x, y) for x, y in zip(data["x"], y_off) if not math.isnan(y)]
if len(finite_pts) == 1:
x0, y0 = finite_pts[0]
xs = [x0 - 0.05, x0 + 0.05] # tiny horizontal spread
ys = [y0, y0]
else:
xs, ys = data["x"], y_off
plt.plot( plt.plot(
data["x"], xs,
y_off, ys,
label=f"{other} ({kind})", label=f"{other} ({kind})",
color=colour, color=colour,
linewidth=2, linewidth=2,

View file

@ -169,6 +169,7 @@ def run(exp_dir: Path, ctx: dict): # pylint: disable=unused-argument
sns.set_style("whitegrid") sns.set_style("whitegrid")
plt.figure(figsize=(10, 7)) plt.figure(figsize=(10, 7))
sns.boxplot(x="Power", y="SupplyCenters", data=df, palette="pastel") sns.boxplot(x="Power", y="SupplyCenters", data=df, palette="pastel")
plt.ylim(0, 18)
plt.title("Supply-center distribution") plt.title("Supply-center distribution")
plt.savefig(analysis_dir / "results_summary.png", dpi=150) plt.savefig(analysis_dir / "results_summary.png", dpi=150)
plt.close() plt.close()

View file

@ -334,6 +334,17 @@ async def main():
if neg_diary_tasks: if neg_diary_tasks:
await asyncio.gather(*neg_diary_tasks, return_exceptions=True) await asyncio.gather(*neg_diary_tasks, return_exceptions=True)
# Diary Consolidation
if current_short_phase.startswith("S") and current_short_phase.endswith("M"):
consolidation_tasks = [
run_diary_consolidation(agent, game, llm_log_file_path,
prompts_dir=agent.prompts_dir)
for agent in agents.values()
if not game.powers[agent.power_name].is_eliminated()
]
if consolidation_tasks:
await asyncio.gather(*consolidation_tasks, return_exceptions=True)
# --- 4c. Order Generation --- # --- 4c. Order Generation ---
logger.info("Getting orders from agents...") logger.info("Getting orders from agents...")
board_state = game.get_state() board_state = game.get_state()
@ -350,7 +361,7 @@ async def main():
game, agent.client, board_state, power_name, possible_orders, game, agent.client, board_state, power_name, possible_orders,
game_history, model_error_stats, game_history, model_error_stats,
agent_goals=agent.goals, agent_relationships=agent.relationships, agent_goals=agent.goals, agent_relationships=agent.relationships,
agent_private_diary_str=agent.format_private_diary_for_prompt(), agent_private_diary_str=agent.get_latest_phase_diary_entries(), # only include latest phase in orders prompt
log_file_path=llm_log_file_path, phase=current_phase, log_file_path=llm_log_file_path, phase=current_phase,
) )
) )
@ -378,10 +389,11 @@ async def main():
submitted_orders_this_phase[p_name] = valid + invalid submitted_orders_this_phase[p_name] = valid + invalid
# diary entry only for the orders we tried to submit # diary entry only for the orders we tried to submit
if valid or invalid: if False: # disabled for now
await agents[p_name].generate_order_diary_entry( if valid or invalid:
game, valid + invalid, llm_log_file_path await agents[p_name].generate_order_diary_entry(
) game, valid + invalid, llm_log_file_path
)
# --- 4d. Process Phase --- # --- 4d. Process Phase ---
completed_phase = current_phase completed_phase = current_phase
@ -414,26 +426,18 @@ async def main():
all_orders_this_phase = game.order_history.get(current_short_phase, {}) all_orders_this_phase = game.order_history.get(current_short_phase, {})
# Phase Result Diary Entries # Phase Result Diary Entries
phase_result_diary_tasks = [ if current_short_phase.endswith("M"):
agent.generate_phase_result_diary_entry(game, game_history, phase_summary, all_orders_this_phase, llm_log_file_path) phase_result_diary_tasks = [
for agent in agents.values() if not game.powers[agent.power_name].is_eliminated() agent.generate_phase_result_diary_entry(game, game_history, phase_summary, all_orders_this_phase, llm_log_file_path, current_short_phase)
] for agent in agents.values() if not game.powers[agent.power_name].is_eliminated()
if phase_result_diary_tasks:
await asyncio.gather(*phase_result_diary_tasks, return_exceptions=True)
# Diary Consolidation
if current_short_phase.startswith("S") and current_short_phase.endswith("M"):
consolidation_tasks = [
run_diary_consolidation(agent, game, llm_log_file_path,
prompts_dir=agent.prompts_dir)
for agent in agents.values()
if not game.powers[agent.power_name].is_eliminated()
] ]
if consolidation_tasks: if phase_result_diary_tasks:
await asyncio.gather(*consolidation_tasks, return_exceptions=True) await asyncio.gather(*phase_result_diary_tasks, return_exceptions=True)
# Agent State Updates # Agent State Updates
if current_short_phase.endswith("M"): if current_short_phase.endswith("M") and run_config.num_negotiation_rounds == 0: # r'ships are updated in negotiation round. otherwise in no press, updated in a separate step.
current_board_state = game.get_state() current_board_state = game.get_state()
state_update_tasks = [ state_update_tasks = [
agent.analyze_phase_and_update_state(game, current_board_state, phase_summary, game_history, llm_log_file_path) agent.analyze_phase_and_update_state(game, current_board_state, phase_summary, game_history, llm_log_file_path)