mirror of
https://github.com/GoodStartLabs/AI_Diplomacy.git
synced 2026-04-19 12:58:09 +00:00
state update fixes & streamline prompts
This commit is contained in:
parent
1f154a7073
commit
b4a56126ec
17 changed files with 710 additions and 281 deletions
|
|
@ -97,6 +97,26 @@ class DiplomacyAgent:
|
|||
logger.info(f"Initialized DiplomacyAgent for {self.power_name} with goals: {self.goals}")
|
||||
self.add_journal_entry(f"Agent initialized. Initial Goals: {self.goals}")
|
||||
|
||||
def _format_board_state(self, board_state_dict):
|
||||
units = board_state_dict.get('units', {})
|
||||
centers = board_state_dict.get('centers', {})
|
||||
|
||||
eliminated = {power for power, scs in centers.items() if not scs}
|
||||
|
||||
parts = ["Units:"]
|
||||
for power, unit_list in sorted(units.items()):
|
||||
label = f"{power} (Eliminated)" if power in eliminated else power
|
||||
parts.append(f" {label}: {', '.join(unit_list)}")
|
||||
|
||||
parts.append("Centers:")
|
||||
for power, center_list in sorted(centers.items()):
|
||||
label = f"{power} (Eliminated)" if power in eliminated else power
|
||||
parts.append(f" {label}: {', '.join(center_list)}")
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
|
||||
def _extract_json_from_text(self, text: str) -> dict:
|
||||
"""Extract and parse JSON from text, handling common LLM response formats."""
|
||||
if not text or not text.strip():
|
||||
|
|
@ -368,6 +388,46 @@ class DiplomacyAgent:
|
|||
f"[{self.power_name}] DIARY ENTRY ADDED for {phase}. Total full entries: {len(self.full_private_diary)}. New entry: {entry[:100]}..."
|
||||
)
|
||||
|
||||
def get_latest_phase_diary_entries(
|
||||
self,
|
||||
*,
|
||||
use_private_diary: bool = False,
|
||||
separator: str = "\n\n",
|
||||
) -> str:
|
||||
"""
|
||||
Return all diary entries for the most-recent phase.
|
||||
|
||||
Args:
|
||||
use_private_diary: If True look at self.private_diary, otherwise
|
||||
self.full_private_diary (default).
|
||||
separator: String to place between entries in the final output.
|
||||
|
||||
Returns:
|
||||
A single formatted string containing every entry from the
|
||||
latest phase, or an empty string if no diary content exists.
|
||||
"""
|
||||
diary: List[str] = self.private_diary if use_private_diary else self.full_private_diary
|
||||
if not diary:
|
||||
return ""
|
||||
|
||||
# Expect entries like "[S1901M] text…"
|
||||
phase_match = re.match(r"\[([^\]]+)\]", diary[-1])
|
||||
if not phase_match:
|
||||
# Last line didn’t start with a phase tag; just return it.
|
||||
return diary[-1]
|
||||
|
||||
latest_phase = phase_match.group(1)
|
||||
recent_entries: List[str] = []
|
||||
|
||||
for entry in reversed(diary):
|
||||
if entry.startswith(f"[{latest_phase}]"):
|
||||
recent_entries.append(entry)
|
||||
else:
|
||||
break
|
||||
|
||||
recent_entries.reverse() # restore chronological order
|
||||
return separator.join(recent_entries)
|
||||
|
||||
def format_private_diary_for_prompt(self) -> str:
|
||||
"""
|
||||
Formats the context diary for inclusion in a prompt.
|
||||
|
|
@ -437,12 +497,12 @@ class DiplomacyAgent:
|
|||
|
||||
# Prepare context for the prompt
|
||||
board_state_dict = game.get_state()
|
||||
board_state_str = f"Units: {board_state_dict.get('units', {})}, Centers: {board_state_dict.get('centers', {})}"
|
||||
board_state_str = self._format_board_state(board_state_dict)
|
||||
|
||||
messages_this_round = game_history.get_messages_this_round(power_name=self.power_name, current_phase_name=game.current_short_phase)
|
||||
if not messages_this_round.strip() or messages_this_round.startswith("\n(No messages"):
|
||||
messages_this_round = (
|
||||
"(No messages involving your power this round that require deep reflection for diary. Focus on overall situation.)"
|
||||
"(No messages involving your power this round.)"
|
||||
)
|
||||
|
||||
current_relationships_str = json.dumps(self.relationships)
|
||||
|
|
@ -463,31 +523,34 @@ class DiplomacyAgent:
|
|||
|
||||
# Do aggressive preprocessing of the template to fix the problematic patterns
|
||||
# This includes removing any newlines or whitespace before JSON keys that cause issues
|
||||
for pattern in ["negotiation_summary", "updated_relationships", "relationship_updates", "intent"]:
|
||||
# Fix the "\n "key"" pattern that breaks .format()
|
||||
prompt_template_content = re.sub(rf'\n\s*"{pattern}"', f'"{pattern}"', prompt_template_content)
|
||||
if False:
|
||||
for pattern in ["negotiation_summary", "updated_relationships", "relationship_updates", "intent"]:
|
||||
# Fix the "\n "key"" pattern that breaks .format()
|
||||
prompt_template_content = re.sub(rf'\n\s*"{pattern}"', f'"{pattern}"', prompt_template_content)
|
||||
|
||||
# Escape all curly braces in JSON examples to prevent format() from interpreting them
|
||||
# First, temporarily replace the actual template variables
|
||||
temp_vars = [
|
||||
"power_name",
|
||||
"current_phase",
|
||||
"messages_this_round",
|
||||
"agent_goals",
|
||||
"agent_relationships",
|
||||
"board_state_str",
|
||||
"ignored_messages_context",
|
||||
]
|
||||
for var in temp_vars:
|
||||
prompt_template_content = prompt_template_content.replace(f"{{{var}}}", f"<<{var}>>")
|
||||
# Escape all curly braces in JSON examples to prevent format() from interpreting them
|
||||
# First, temporarily replace the actual template variables
|
||||
|
||||
temp_vars = [
|
||||
"power_name",
|
||||
"current_phase",
|
||||
"messages_this_round",
|
||||
"agent_goals",
|
||||
"agent_relationships",
|
||||
"board_state_str",
|
||||
"ignored_messages_context",
|
||||
"private_diary_summary",
|
||||
]
|
||||
for var in temp_vars:
|
||||
prompt_template_content = prompt_template_content.replace(f"{{{var}}}", f"<<{var}>>")
|
||||
|
||||
# Now escape all remaining braces (which should be JSON)
|
||||
prompt_template_content = prompt_template_content.replace("{", "{{")
|
||||
prompt_template_content = prompt_template_content.replace("}", "}}")
|
||||
# Now escape all remaining braces (which should be JSON)
|
||||
prompt_template_content = prompt_template_content.replace("{", "{{")
|
||||
prompt_template_content = prompt_template_content.replace("}", "}}")
|
||||
|
||||
# Restore the template variables
|
||||
for var in temp_vars:
|
||||
prompt_template_content = prompt_template_content.replace(f"<<{var}>>", f"{{{var}}}")
|
||||
# Restore the template variables
|
||||
for var in temp_vars:
|
||||
prompt_template_content = prompt_template_content.replace(f"<<{var}>>", f"{{{var}}}")
|
||||
|
||||
# Create a dictionary with safe values for formatting
|
||||
format_vars = {
|
||||
|
|
@ -515,8 +578,6 @@ class DiplomacyAgent:
|
|||
|
||||
logger.debug(f"[{self.power_name}] Negotiation diary prompt:\n{full_prompt[:500]}...")
|
||||
|
||||
logger.debug(f"[{self.power_name}] Negotiation diary prompt:\n{full_prompt[:500]}...")
|
||||
|
||||
raw_response = await run_llm_and_log(
|
||||
client=self.client,
|
||||
prompt=full_prompt,
|
||||
|
|
@ -567,7 +628,6 @@ class DiplomacyAgent:
|
|||
diary_text_candidate = parsed_data["intent"]
|
||||
else:
|
||||
diary_text_candidate += "\nIntent: " + parsed_data["intent"]
|
||||
|
||||
if diary_text_candidate:
|
||||
diary_entry_text = diary_text_candidate
|
||||
else:
|
||||
|
|
@ -610,6 +670,10 @@ class DiplomacyAgent:
|
|||
elif new_relationships is not None: # It was provided but not a dict
|
||||
logger.warning(f"[{self.power_name}] 'updated_relationships' from diary LLM was not a dictionary: {type(new_relationships)}")
|
||||
|
||||
# update goals
|
||||
if "goals" in parsed_data:
|
||||
self.update_goals(parsed_data["goals"])
|
||||
|
||||
# Add the generated (or fallback) diary entry
|
||||
self.add_diary_entry(diary_entry_text, game.current_short_phase)
|
||||
if relationships_updated:
|
||||
|
|
@ -627,16 +691,19 @@ class DiplomacyAgent:
|
|||
self.add_diary_entry(f"(Error generating diary entry: {type(e).__name__})", game.current_short_phase)
|
||||
finally:
|
||||
if log_file_path: # Ensure log_file_path is provided
|
||||
log_llm_response(
|
||||
log_file_path=log_file_path,
|
||||
model_name=self.client.model_name if self.client else "UnknownModel",
|
||||
power_name=self.power_name,
|
||||
phase=game.current_short_phase if game else "UnknownPhase",
|
||||
response_type="negotiation_diary", # Specific type for CSV logging
|
||||
raw_input_prompt=full_prompt,
|
||||
raw_response=raw_response,
|
||||
success=success_status,
|
||||
)
|
||||
try:
|
||||
log_llm_response(
|
||||
log_file_path=log_file_path,
|
||||
model_name=self.client.model_name if self.client else "UnknownModel",
|
||||
power_name=self.power_name,
|
||||
phase=game.current_short_phase if game else "UnknownPhase",
|
||||
response_type="negotiation_diary", # Specific type for CSV logging
|
||||
raw_input_prompt=full_prompt,
|
||||
raw_response=raw_response,
|
||||
success=success_status,
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
async def generate_order_diary_entry(self, game: "Game", orders: List[str], log_file_path: str):
|
||||
"""
|
||||
|
|
@ -783,105 +850,108 @@ class DiplomacyAgent:
|
|||
# Rest of the code remains the same
|
||||
|
||||
async def generate_phase_result_diary_entry(
|
||||
self, game: "Game", game_history: "GameHistory", phase_summary: str, all_orders: Dict[str, List[str]], log_file_path: str
|
||||
self, game: "Game", game_history: "GameHistory", phase_summary: str, all_orders: Dict[str, List[str]], log_file_path: str, phase_name: str
|
||||
):
|
||||
"""
|
||||
Generates a diary entry analyzing the actual phase results,
|
||||
comparing them to negotiations and identifying betrayals/collaborations.
|
||||
"""
|
||||
logger.info(f"[{self.power_name}] Generating phase result diary entry for {game.current_short_phase}...")
|
||||
|
||||
# Load the template
|
||||
prompt_template = load_prompt("phase_result_diary_prompt.txt", prompts_dir=self.prompts_dir)
|
||||
if not prompt_template:
|
||||
logger.error(f"[{self.power_name}] Could not load phase_result_diary_prompt.txt. Skipping diary entry.")
|
||||
return
|
||||
|
||||
# Format all orders for the prompt
|
||||
all_orders_formatted = ""
|
||||
for power, orders in all_orders.items():
|
||||
orders_str = ", ".join(orders) if orders else "No orders"
|
||||
all_orders_formatted += f"{power}: {orders_str}\n"
|
||||
|
||||
# Get your own orders
|
||||
your_orders = all_orders.get(self.power_name, [])
|
||||
your_orders_str = ", ".join(your_orders) if your_orders else "No orders"
|
||||
|
||||
# Get recent negotiations for this phase
|
||||
messages_this_phase = game_history.get_messages_by_phase(game.current_short_phase)
|
||||
your_negotiations = ""
|
||||
for msg in messages_this_phase:
|
||||
if msg.sender == self.power_name:
|
||||
your_negotiations += f"To {msg.recipient}: {msg.content}\n"
|
||||
elif msg.recipient == self.power_name:
|
||||
your_negotiations += f"From {msg.sender}: {msg.content}\n"
|
||||
|
||||
if not your_negotiations:
|
||||
your_negotiations = "No negotiations this phase"
|
||||
|
||||
# Format relationships
|
||||
relationships_str = "\n".join([f"{p}: {r}" for p, r in self.relationships.items()])
|
||||
|
||||
# Format goals
|
||||
goals_str = "\n".join([f"- {g}" for g in self.goals]) if self.goals else "None"
|
||||
|
||||
# Create the prompt
|
||||
prompt = prompt_template.format(
|
||||
power_name=self.power_name,
|
||||
current_phase=game.current_short_phase,
|
||||
phase_summary=phase_summary,
|
||||
all_orders_formatted=all_orders_formatted,
|
||||
your_negotiations=your_negotiations,
|
||||
pre_phase_relationships=relationships_str,
|
||||
agent_goals=goals_str,
|
||||
your_actual_orders=your_orders_str,
|
||||
)
|
||||
|
||||
logger.debug(f"[{self.power_name}] Phase result diary prompt:\n{prompt[:500]}...")
|
||||
|
||||
raw_response = ""
|
||||
success_status = "FALSE"
|
||||
|
||||
try:
|
||||
raw_response = await run_llm_and_log(
|
||||
client=self.client,
|
||||
prompt=prompt,
|
||||
"""
|
||||
Generates a diary entry analyzing the actual phase results,
|
||||
comparing them to negotiations and identifying betrayals/collaborations.
|
||||
"""
|
||||
logger.info(f"[{self.power_name}] Generating phase result diary entry for {game.current_short_phase}...")
|
||||
|
||||
# Load the template
|
||||
prompt_template = load_prompt("phase_result_diary_prompt.txt", prompts_dir=self.prompts_dir)
|
||||
if not prompt_template:
|
||||
logger.error(f"[{self.power_name}] Could not load phase_result_diary_prompt.txt. Skipping diary entry.")
|
||||
return
|
||||
|
||||
# Format all orders for the prompt
|
||||
all_orders_formatted = game_history.get_order_history_for_prompt(
|
||||
game=game, # Pass the game object for normalization
|
||||
power_name=self.power_name,
|
||||
phase=game.current_short_phase,
|
||||
response_type="phase_result_diary",
|
||||
current_phase_name=game.current_short_phase,
|
||||
num_movement_phases_to_show=1,
|
||||
)
|
||||
|
||||
if raw_response and raw_response.strip():
|
||||
# The response should be plain text diary entry
|
||||
diary_entry = raw_response.strip()
|
||||
self.add_diary_entry(diary_entry, game.current_short_phase)
|
||||
success_status = "TRUE"
|
||||
logger.info(f"[{self.power_name}] Phase result diary entry generated and added.")
|
||||
else:
|
||||
fallback_diary = (
|
||||
f"Phase {game.current_short_phase} completed. Orders executed as: {your_orders_str}. (Failed to generate detailed analysis)"
|
||||
formatted_diary = self.format_private_diary_for_prompt()
|
||||
|
||||
board_state_dict = game.get_state()
|
||||
board_state_str = self._format_board_state(board_state_dict)
|
||||
|
||||
# Get recent negotiations for this phase
|
||||
messages_this_round = game_history.get_messages_this_round(power_name=self.power_name, current_phase_name=game.current_short_phase)
|
||||
if not messages_this_round.strip() or messages_this_round.startswith("\n(No messages"):
|
||||
messages_this_round = (
|
||||
"(No messages involving your power this round.)"
|
||||
)
|
||||
self.add_diary_entry(fallback_diary, game.current_short_phase)
|
||||
logger.warning(f"[{self.power_name}] Empty response from LLM. Added fallback phase result diary.")
|
||||
success_status = "FALSE"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.power_name}] Error generating phase result diary: {e}", exc_info=True)
|
||||
fallback_diary = f"Phase {game.current_short_phase} completed. Unable to analyze results due to error."
|
||||
self.add_diary_entry(fallback_diary, game.current_short_phase)
|
||||
success_status = f"FALSE: {type(e).__name__}"
|
||||
finally:
|
||||
log_llm_response(
|
||||
log_file_path=log_file_path,
|
||||
model_name=self.client.model_name,
|
||||
# Format relationships
|
||||
relationships_str = "\n".join([f"{p}: {r}" for p, r in self.relationships.items()])
|
||||
|
||||
# Format goals
|
||||
goals_str = "\n".join([f"- {g}" for g in self.goals]) if self.goals else "None"
|
||||
|
||||
# Create the prompt
|
||||
prompt = prompt_template.format(
|
||||
power_name=self.power_name,
|
||||
phase=game.current_short_phase,
|
||||
response_type="phase_result_diary",
|
||||
raw_input_prompt=prompt,
|
||||
raw_response=raw_response,
|
||||
success=success_status,
|
||||
current_phase=phase_name,
|
||||
phase_summary=phase_summary,
|
||||
all_orders_formatted=all_orders_formatted,
|
||||
your_negotiations=messages_this_round,
|
||||
pre_phase_relationships=relationships_str,
|
||||
agent_goals=goals_str,
|
||||
formatted_diary=formatted_diary,
|
||||
board_state=board_state_str,
|
||||
)
|
||||
|
||||
logger.debug(f"[{self.power_name}] Phase result diary prompt:\n{prompt[:500]}...")
|
||||
|
||||
raw_response = ""
|
||||
success_status = "FALSE"
|
||||
|
||||
try:
|
||||
raw_response = await run_llm_and_log(
|
||||
client=self.client,
|
||||
prompt=prompt,
|
||||
power_name=self.power_name,
|
||||
phase=phase_name,
|
||||
response_type="phase_result_diary",
|
||||
)
|
||||
|
||||
if raw_response and raw_response.strip():
|
||||
# The response should be plain text diary entry
|
||||
diary_entry = raw_response.strip()
|
||||
self.add_diary_entry(diary_entry, phase_name)
|
||||
success_status = "TRUE"
|
||||
logger.info(f"[{self.power_name}] Phase result diary entry generated and added.")
|
||||
else:
|
||||
fallback_diary = (
|
||||
f"Phase {phase_name} completed."
|
||||
)
|
||||
self.add_diary_entry(fallback_diary, phase_name)
|
||||
logger.warning(f"[{self.power_name}] Empty response from LLM. Added fallback phase result diary.")
|
||||
success_status = "FALSE"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.power_name}] Error generating phase result diary: {e}", exc_info=True)
|
||||
fallback_diary = f"Phase {phase_name} completed. Unable to analyze results due to error."
|
||||
self.add_diary_entry(fallback_diary, phase_name)
|
||||
success_status = f"FALSE: {type(e).__name__}"
|
||||
finally:
|
||||
log_llm_response(
|
||||
log_file_path=log_file_path,
|
||||
model_name=self.client.model_name,
|
||||
power_name=self.power_name,
|
||||
phase=phase_name,
|
||||
response_type="phase_result_diary",
|
||||
raw_input_prompt=prompt,
|
||||
raw_response=raw_response,
|
||||
success=success_status,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
logger.error('!generate_phase_result_diary_entry failed')
|
||||
|
||||
def log_state(self, prefix=""):
|
||||
logger.debug(f"[{self.power_name}] {prefix} State: Goals={self.goals}, Relationships={self.relationships}")
|
||||
|
||||
|
|
|
|||
|
|
@ -1039,7 +1039,7 @@ class OpenRouterClient(BaseModelClient):
|
|||
|
||||
logger.debug(f"[{self.model_name}] Initialized OpenRouter client")
|
||||
|
||||
async def generate_response(self, prompt: str, temperature: float = 0.5, inject_random_seed: bool = True) -> str:
|
||||
async def generate_response(self, prompt: str, temperature: float = 0.0, inject_random_seed: bool = True) -> str:
|
||||
"""Generate a response using OpenRouter with robust error handling."""
|
||||
try:
|
||||
# Append the call to action to the user's prompt
|
||||
|
|
|
|||
|
|
@ -11,49 +11,90 @@ if TYPE_CHECKING:
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def run_diary_consolidation(
|
||||
agent: "DiplomacyAgent",
|
||||
game: "Game",
|
||||
log_file_path: str,
|
||||
entries_to_keep_unsummarized: int = 6,
|
||||
years_to_keep_unsummarised: int = 1,
|
||||
prompts_dir: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Consolidate older diary entries while keeping recent ones.
|
||||
This is the logic moved from the DiplomacyAgent class.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
agent : DiplomacyAgent
|
||||
game : Game
|
||||
log_file_path : str
|
||||
years_to_keep_unsummarised : int, default 1
|
||||
Number of *distinct years* whose entries remain verbatim.
|
||||
prompts_dir : Optional[str]
|
||||
"""
|
||||
logger.info(f"[{agent.power_name}] CONSOLIDATION START — {len(agent.full_private_diary)} total full entries")
|
||||
logger.info(
|
||||
f"[{agent.power_name}] CONSOLIDATION START — "
|
||||
f"{len(agent.full_private_diary)} total full entries"
|
||||
)
|
||||
|
||||
full_entries = [e for e in agent.full_private_diary if not e.startswith("[CONSOLIDATED HISTORY]")]
|
||||
# Remove any earlier consolidated block first
|
||||
full_entries = [
|
||||
e for e in agent.full_private_diary
|
||||
if not e.startswith("[CONSOLIDATED HISTORY]")
|
||||
]
|
||||
|
||||
if len(full_entries) <= entries_to_keep_unsummarized:
|
||||
agent.private_diary = list(agent.full_private_diary)
|
||||
logger.info(f"[{agent.power_name}] ≤ {entries_to_keep_unsummarized} full entries — skipping consolidation")
|
||||
if not full_entries:
|
||||
agent.private_diary = []
|
||||
logger.warning(f"[{agent.power_name}] No diary entries found")
|
||||
return
|
||||
|
||||
boundary_entry = full_entries[-entries_to_keep_unsummarized]
|
||||
match = re.search(r"\[[SFWRAB]\s*(\d{4})", boundary_entry)
|
||||
if not match:
|
||||
logger.error(f"[{agent.power_name}] Could not parse year from boundary entry; aborting consolidation")
|
||||
# Extract years by scanning from newest to oldest
|
||||
year_re = re.compile(r"\[[SFWRAB]\s*(\d{4})") # matches “[S1901”, “[F1902”…”
|
||||
recent_years: list[int] = []
|
||||
|
||||
for entry in reversed(full_entries): # newest last
|
||||
match = year_re.search(entry)
|
||||
if not match:
|
||||
# Lines without a year tag are considered “dateless”; keep them
|
||||
continue
|
||||
yr = int(match.group(1))
|
||||
if yr not in recent_years:
|
||||
recent_years.append(yr)
|
||||
if len(recent_years) >= years_to_keep_unsummarised:
|
||||
break
|
||||
|
||||
# If every distinct year falls inside the keep-window, skip consolidation
|
||||
all_years = {
|
||||
int(m.group(1))
|
||||
for e in full_entries
|
||||
if (m := year_re.search(e))
|
||||
}
|
||||
if len(all_years - set(recent_years)) == 0:
|
||||
agent.private_diary = list(agent.full_private_diary)
|
||||
logger.info(
|
||||
f"[{agent.power_name}] ≤ {years_to_keep_unsummarised} distinct years "
|
||||
"— skipping consolidation"
|
||||
)
|
||||
return
|
||||
|
||||
cutoff_year = int(match.group(1))
|
||||
logger.info(f"[{agent.power_name}] Cut-off year for consolidation: {cutoff_year}")
|
||||
# Partition entries
|
||||
keep_set = set(recent_years)
|
||||
|
||||
def _entry_year(entry: str) -> int | None:
|
||||
m = re.search(r"\[[SFWRAB]\s*(\d{4})", entry)
|
||||
def _entry_year(entry: str) -> Optional[int]:
|
||||
m = year_re.search(entry)
|
||||
return int(m.group(1)) if m else None
|
||||
|
||||
entries_to_summarize = [e for e in full_entries if (_entry_year(e) is not None and _entry_year(e) < cutoff_year)]
|
||||
entries_to_keep = [e for e in full_entries if (_entry_year(e) is None or _entry_year(e) >= cutoff_year)]
|
||||
entries_to_keep = [e for e in full_entries if (_entry_year(e) in keep_set)]
|
||||
entries_to_summarise = [e for e in full_entries if (_entry_year(e) not in keep_set)]
|
||||
|
||||
logger.info(f"[{agent.power_name}] Summarising {len(entries_to_summarize)} entries; keeping {len(entries_to_keep)} recent entries verbatim")
|
||||
logger.info(
|
||||
f"[{agent.power_name}] Summarising {len(entries_to_summarise)} entries "
|
||||
f"from years < {min(keep_set)}; keeping {len(entries_to_keep)} recent entries verbatim"
|
||||
)
|
||||
|
||||
if not entries_to_summarize:
|
||||
if not entries_to_summarise:
|
||||
agent.private_diary = list(agent.full_private_diary)
|
||||
logger.warning(f"[{agent.power_name}] No eligible entries to summarise; context diary left unchanged")
|
||||
logger.warning(
|
||||
f"[{agent.power_name}] No eligible entries to summarise; context diary left unchanged"
|
||||
)
|
||||
return
|
||||
|
||||
prompt_template = load_prompt("diary_consolidation_prompt.txt", prompts_dir=prompts_dir)
|
||||
|
|
@ -63,7 +104,7 @@ async def run_diary_consolidation(
|
|||
|
||||
prompt = prompt_template.format(
|
||||
power_name=agent.power_name,
|
||||
full_diary_text="\n\n".join(entries_to_summarize),
|
||||
full_diary_text="\n\n".join(entries_to_summarise),
|
||||
)
|
||||
|
||||
raw_response = ""
|
||||
|
|
@ -71,7 +112,6 @@ async def run_diary_consolidation(
|
|||
consolidation_client = None
|
||||
try:
|
||||
consolidation_client = agent.client
|
||||
|
||||
raw_response = await run_llm_and_log(
|
||||
client=consolidation_client,
|
||||
prompt=prompt,
|
||||
|
|
@ -87,14 +127,21 @@ async def run_diary_consolidation(
|
|||
new_summary_entry = f"[CONSOLIDATED HISTORY] {consolidated_text}"
|
||||
agent.private_diary = [new_summary_entry] + entries_to_keep
|
||||
success_flag = "TRUE"
|
||||
logger.info(f"[{agent.power_name}] Consolidation complete — {len(agent.private_diary)} context entries now")
|
||||
logger.info(
|
||||
f"[{agent.power_name}] Consolidation complete — "
|
||||
f"{len(agent.private_diary)} context entries now"
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
logger.error(f"[{agent.power_name}] Diary consolidation failed: {exc}", exc_info=True)
|
||||
finally:
|
||||
log_llm_response(
|
||||
log_file_path=log_file_path,
|
||||
model_name=(consolidation_client.model_name if consolidation_client is not None else agent.client.model_name),
|
||||
model_name=(
|
||||
consolidation_client.model_name
|
||||
if consolidation_client is not None
|
||||
else agent.client.model_name
|
||||
),
|
||||
power_name=agent.power_name,
|
||||
phase=game.current_short_phase,
|
||||
response_type="diary_consolidation",
|
||||
|
|
|
|||
|
|
@ -182,7 +182,7 @@ class GameHistory:
|
|||
eng2code = {"AUSTRIA": "AUT", "ENGLAND": "ENG", "FRANCE": "FRA", "GERMANY": "GER", "ITALY": "ITA", "RUSSIA": "RUS", "TURKEY": "TUR"}
|
||||
norm = game.map.norm
|
||||
|
||||
out_lines = ["**ORDER HISTORY (Recent Rounds)**"]
|
||||
out_lines = []
|
||||
|
||||
for ph in phases_to_report:
|
||||
if not (ph.orders_by_power or ph.submitted_orders_by_power):
|
||||
|
|
@ -234,8 +234,14 @@ class GameHistory:
|
|||
tag = "bounce"
|
||||
elif "void" == tag:
|
||||
tag = "void: no effect"
|
||||
|
||||
out_lines.append(f" {order} ({tag})")
|
||||
|
||||
# don't show (success) tag for hold moves, it might be causing convergence on
|
||||
# always-hold behaviour
|
||||
is_hold = any(kw in order.upper() for kw in (" H", " HOLD"))
|
||||
if tag == "success" and is_hold:
|
||||
out_lines.append(f" {order}")
|
||||
else:
|
||||
out_lines.append(f" {order} ({tag})")
|
||||
seen_ok.add(_norm_keep(order))
|
||||
|
||||
# 2️⃣ invalid submissions
|
||||
|
|
@ -246,6 +252,139 @@ class GameHistory:
|
|||
return "\n(No orders were issued in recent history)\n"
|
||||
return "\n".join(out_lines)
|
||||
|
||||
def get_orders_history_for_phase(
|
||||
self,
|
||||
game: "Game",
|
||||
phase_name: str, # ← the single phase we want
|
||||
) -> Dict[str, Dict[str, List[Dict[str, str]]]]:
|
||||
"""
|
||||
Return the orders for `phase_name` as:
|
||||
|
||||
{
|
||||
"<POWER>": {
|
||||
"<order_type>": [
|
||||
{"order": "<order str>", "result": "<result str>"},
|
||||
...
|
||||
],
|
||||
...
|
||||
},
|
||||
...
|
||||
}
|
||||
|
||||
Order types: move, hold, support, convoy, build, disband, waive, other.
|
||||
"""
|
||||
|
||||
# ── locate the requested phase ──────────────────────────────
|
||||
target_phase = next((p for p in self.phases if p.name == phase_name), None)
|
||||
if not target_phase or not (target_phase.orders_by_power or target_phase.submitted_orders_by_power):
|
||||
return {}
|
||||
|
||||
# ── helpers ───────────────────────────────────────────────
|
||||
def _scalar(res):
|
||||
"""Flatten lists/dicts to a single outcome token."""
|
||||
tag = res
|
||||
while isinstance(tag, list):
|
||||
tag = tag[0] if tag else ""
|
||||
if isinstance(tag, dict):
|
||||
tag = tag.get("outcome") or tag.get("result") or ""
|
||||
return str(tag).strip().lower()
|
||||
|
||||
def _order_type(order: str) -> str:
|
||||
o = order.upper()
|
||||
if o == "WAIVE":
|
||||
return "waive"
|
||||
if " H" in o or " HOLD" in o:
|
||||
return "hold"
|
||||
if " S " in o:
|
||||
return "support"
|
||||
if " C " in o:
|
||||
return "convoy"
|
||||
if " R " in o:
|
||||
return "retreat"
|
||||
if " - " in o:
|
||||
return "move"
|
||||
if " BUILD" in o or o.endswith(" B") or " B " in o:
|
||||
return "build"
|
||||
if " DISBAND" in o or o.endswith(" D") or " D " in o:
|
||||
return "disband"
|
||||
return "other"
|
||||
|
||||
# engine fallback
|
||||
engine_phases = {ph.name: ph for ph in getattr(game, "get_phase_history", lambda: [])()}
|
||||
eng2code = {
|
||||
"AUSTRIA": "AUT", "ENGLAND": "ENG", "FRANCE": "FRA",
|
||||
"GERMANY": "GER", "ITALY": "ITA", "RUSSIA": "RUS", "TURKEY": "TUR",
|
||||
}
|
||||
norm = game.map.norm
|
||||
|
||||
orders_by_power = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
# iterate powers present in this phase
|
||||
for pwr in sorted(set(target_phase.orders_by_power) | set(target_phase.submitted_orders_by_power)):
|
||||
submitted = target_phase.submitted_orders_by_power.get(pwr, [])
|
||||
accepted = target_phase.orders_by_power.get(pwr, [])
|
||||
|
||||
if isinstance(submitted, str):
|
||||
submitted = [submitted]
|
||||
if isinstance(accepted, str):
|
||||
accepted = [accepted]
|
||||
|
||||
def _norm_keep(o):
|
||||
return o if o.upper() == "WAIVE" else norm(o)
|
||||
|
||||
sub_norm = {_norm_keep(o): o for o in submitted}
|
||||
acc_norm = {_norm_keep(o): o for o in accepted}
|
||||
|
||||
# outcome source
|
||||
raw_res = target_phase.results_by_power.get(pwr) or target_phase.results_by_power or {}
|
||||
if not raw_res:
|
||||
eng = engine_phases.get(target_phase.name)
|
||||
if eng and hasattr(eng, "order_results"):
|
||||
key = next((k for k, v in eng2code.items() if v == pwr), None)
|
||||
raw_res = (eng.order_results or {}).get(key, {})
|
||||
|
||||
seen_ok = set()
|
||||
|
||||
# accepted orders
|
||||
for idx, order in enumerate(accepted):
|
||||
if isinstance(raw_res, dict):
|
||||
res_raw = raw_res.get(order) or raw_res.get(" ".join(order.split()[:2]))
|
||||
elif isinstance(raw_res, list) and idx < len(raw_res):
|
||||
res_raw = raw_res[idx]
|
||||
else:
|
||||
res_raw = ""
|
||||
|
||||
tag = _scalar(res_raw)
|
||||
if not tag or tag == "ok":
|
||||
tag = "success"
|
||||
elif "bounce" in tag:
|
||||
tag = "bounce"
|
||||
elif "void" == tag:
|
||||
tag = "void: no effect"
|
||||
|
||||
result_field = tag
|
||||
|
||||
orders_by_power[pwr][_order_type(order)].append(
|
||||
{"order": order, "result": result_field}
|
||||
)
|
||||
seen_ok.add(_norm_keep(order))
|
||||
|
||||
# invalid submissions
|
||||
for k in sorted(set(sub_norm) - seen_ok):
|
||||
order_str = sub_norm[k]
|
||||
orders_by_power[pwr][_order_type(order_str)].append(
|
||||
{"order": order_str, "result": "invalid"}
|
||||
)
|
||||
|
||||
# convert nested defaultdicts to regular dicts
|
||||
return {
|
||||
pwr: {otype: lst for otype, lst in type_map.items()}
|
||||
for pwr, type_map in orders_by_power.items()
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
def get_messages_this_round(self, power_name: str, current_phase_name: str) -> str:
|
||||
current_phase: Optional[Phase] = None
|
||||
for phase_obj in self.phases:
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ def save_game_state(
|
|||
if year_val is not None and year_val > run_config.max_year:
|
||||
break
|
||||
|
||||
phase_name = phase_block["name"]
|
||||
phase_name = phase_block["name"]
|
||||
|
||||
# 3a. Re-attach anything we cached from a previous save.
|
||||
if phase_name in previous_phase_extras:
|
||||
|
|
@ -151,12 +151,15 @@ def save_game_state(
|
|||
# -------------------------------------------------------------------
|
||||
phase_block["config"] = cfg
|
||||
phase_block["state_agents"] = current_state_agents
|
||||
phase_block["order_results"] = game_history.get_orders_history_for_phase(
|
||||
game, completed_phase_name
|
||||
)
|
||||
|
||||
# -------------------------------------------------------------- #
|
||||
# 4. Attach top-level metadata and write atomically. #
|
||||
# -------------------------------------------------------------- #
|
||||
saved_game["phase_summaries"] = getattr(game, "phase_summaries", {})
|
||||
saved_game["final_agent_states"] = {p_name: {"relationships": a.relationships, "goals": a.goals} for p_name, a in agents.items()}
|
||||
saved_game["final_agent_states"] = {p_name: {"relationships": a.relationships, "goals": a.goals} for p_name, a in agents.items()}
|
||||
|
||||
# Filter out phases > max_year
|
||||
# saved_game["phases"] = [
|
||||
|
|
@ -210,8 +213,8 @@ def load_game_state(
|
|||
last_phase = saved_game_data["phases"][-1]
|
||||
|
||||
# Wipe the data that must be regenerated **but preserve the keys**
|
||||
last_phase["orders"] = {} # was dict
|
||||
last_phase["results"] = {} # was dict
|
||||
last_phase["orders"] = {}
|
||||
last_phase["results"] = {}
|
||||
last_phase["messages"] = []
|
||||
|
||||
game = from_saved_game_format(saved_game_data)
|
||||
|
|
|
|||
|
|
@ -158,7 +158,7 @@ async def initialize_agent_state_ext(
|
|||
# Fallback if LLM data was not applied or parsing failed
|
||||
if not initial_goals_applied:
|
||||
if not agent.goals: # Only set defaults if no goals were set during agent construction or by LLM
|
||||
agent.goals = ["Survive and expand", "Form beneficial alliances", "Secure key territories"]
|
||||
agent.goals = []
|
||||
agent.add_journal_entry(f"[{current_phase}] Set default initial goals as LLM provided none or parse failed.")
|
||||
logger.info(f"[{power_name}] Default goals set.")
|
||||
|
||||
|
|
@ -180,7 +180,7 @@ async def initialize_agent_state_ext(
|
|||
success_status = f"Failure: Exception ({type(e).__name__})"
|
||||
# Fallback logic for goals/relationships if not already set by earlier fallbacks
|
||||
if not agent.goals:
|
||||
agent.goals = ["Survive and expand", "Form beneficial alliances", "Secure key territories"]
|
||||
agent.goals = []
|
||||
logger.info(f"[{power_name}] Set fallback goals after top-level error: {agent.goals}")
|
||||
if not agent.relationships or all(r == "Neutral" for r in agent.relationships.values()):
|
||||
agent.relationships = {p: "Neutral" for p in ALL_POWERS if p != power_name}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,9 @@ async def conduct_negotiations(
|
|||
Conducts a round-robin conversation among all non-eliminated powers.
|
||||
Each power can send up to 'max_rounds' messages, choosing between private
|
||||
and global messages each turn. Uses asyncio for concurrent message generation.
|
||||
|
||||
NEW: Prevents a power from sending a private message to the same recipient
|
||||
in two consecutive rounds if that recipient has not replied yet.
|
||||
"""
|
||||
logger.info("Starting negotiation phase.")
|
||||
|
||||
|
|
@ -43,6 +46,11 @@ async def conduct_negotiations(
|
|||
else:
|
||||
logger.info("No eliminated powers yet.")
|
||||
|
||||
# ── new tracking for consecutive private messages ───────────────
|
||||
last_sent_round: Dict[tuple[str, str], int] = {}
|
||||
awaiting_reply: Dict[tuple[str, str], bool] = {}
|
||||
# ────────────────────────────────────────────────────────────────
|
||||
|
||||
# We do up to 'max_rounds' single-message turns for each power
|
||||
for round_index in range(max_rounds):
|
||||
logger.info(f"Negotiation Round {round_index + 1}/{max_rounds}")
|
||||
|
|
@ -99,14 +107,13 @@ async def conduct_negotiations(
|
|||
|
||||
if isinstance(result, Exception):
|
||||
logger.error(f"Error getting conversation reply for {power_name}: {result}", exc_info=result)
|
||||
# Use model_name for stats key if possible
|
||||
if model_name in model_error_stats:
|
||||
model_error_stats[model_name]["conversation_errors"] += 1
|
||||
else: # Fallback to power_name if model name not tracked (shouldn't happen)
|
||||
else:
|
||||
model_error_stats.setdefault(power_name, {}).setdefault("conversation_errors", 0)
|
||||
model_error_stats[power_name]["conversation_errors"] += 1
|
||||
messages = [] # Treat as no messages on error
|
||||
elif result is None: # Handle case where client might return None on internal error
|
||||
messages = []
|
||||
elif result is None:
|
||||
logger.warning(f"Received None instead of messages for {power_name}.")
|
||||
messages = []
|
||||
if model_name in model_error_stats:
|
||||
|
|
@ -115,48 +122,65 @@ async def conduct_negotiations(
|
|||
model_error_stats.setdefault(power_name, {}).setdefault("conversation_errors", 0)
|
||||
model_error_stats[power_name]["conversation_errors"] += 1
|
||||
else:
|
||||
messages = result # result is the list of message dicts
|
||||
messages = result
|
||||
logger.debug(f"Received {len(messages)} message(s) from {power_name}.")
|
||||
|
||||
# Process the received messages (same logic as before)
|
||||
if messages:
|
||||
for message in messages:
|
||||
# Validate message structure
|
||||
if not isinstance(message, dict) or "content" not in message:
|
||||
logger.warning(f"Invalid message format received from {power_name}: {message}. Skipping.")
|
||||
continue
|
||||
|
||||
# Create an official message in the Diplomacy engine
|
||||
# Determine recipient based on message type
|
||||
if message.get("message_type") == "private":
|
||||
recipient = normalize_recipient_name(message.get("recipient", GLOBAL)) # Default to GLOBAL if recipient missing somehow
|
||||
if recipient not in game.powers and recipient != GLOBAL:
|
||||
logger.warning(f"Invalid recipient '{recipient}' in message from {power_name}. Sending globally.")
|
||||
recipient = GLOBAL # Fallback to GLOBAL if recipient power is invalid
|
||||
else: # Assume global if not private or type is missing
|
||||
recipient = GLOBAL
|
||||
|
||||
diplo_message = Message(
|
||||
phase=game.current_short_phase,
|
||||
sender=power_name,
|
||||
recipient=recipient, # Use determined recipient
|
||||
message=message.get("content", ""), # Use .get for safety
|
||||
time_sent=None, # Let the engine assign time
|
||||
)
|
||||
game.add_message(diplo_message)
|
||||
# Also add to our custom history
|
||||
game_history.add_message(
|
||||
game.current_short_phase,
|
||||
power_name,
|
||||
recipient, # Use determined recipient here too
|
||||
message.get("content", ""), # Use .get for safety
|
||||
)
|
||||
journal_recipient = f"to {recipient}" if recipient != GLOBAL else "globally"
|
||||
agent.add_journal_entry(f"Sent message {journal_recipient} in {game.current_short_phase}: {message.get('content', '')[:100]}...")
|
||||
logger.info(f"[{power_name} -> {recipient}] {message.get('content', '')[:100]}...")
|
||||
else:
|
||||
if not messages:
|
||||
logger.debug(f"No valid messages returned or error occurred for {power_name}.")
|
||||
# Error stats handled above based on result type
|
||||
continue
|
||||
|
||||
for message in messages:
|
||||
if not isinstance(message, dict) or "content" not in message:
|
||||
logger.warning(f"Invalid message format received from {power_name}: {message}. Skipping.")
|
||||
continue
|
||||
|
||||
# Determine recipient
|
||||
if message.get("message_type") == "private":
|
||||
recipient = normalize_recipient_name(message.get("recipient", GLOBAL))
|
||||
if recipient not in game.powers and recipient != GLOBAL:
|
||||
logger.warning(f"Invalid recipient '{recipient}' in message from {power_name}. Sending globally.")
|
||||
recipient = GLOBAL
|
||||
else:
|
||||
recipient = GLOBAL
|
||||
|
||||
# ── repetition guard for private messages ─────────────
|
||||
if recipient != GLOBAL:
|
||||
pair = (power_name, recipient)
|
||||
if awaiting_reply.get(pair, False) and last_sent_round.get(pair) == round_index - 1:
|
||||
logger.info(
|
||||
f"Discarding repeat private message from {power_name} to {recipient} "
|
||||
f"(waiting for reply since last round)."
|
||||
)
|
||||
continue # skip this message
|
||||
|
||||
# record outbound and set waiting flag
|
||||
last_sent_round[pair] = round_index
|
||||
awaiting_reply[pair] = True
|
||||
# recipient has now been contacted; when they respond, we'll clear the flag for the reverse pair
|
||||
awaiting_reply[(recipient, power_name)] = False
|
||||
# ─────────────────────────────────────────────────────
|
||||
|
||||
diplo_message = Message(
|
||||
phase=game.current_short_phase,
|
||||
sender=power_name,
|
||||
recipient=recipient,
|
||||
message=message.get("content", ""),
|
||||
time_sent=None,
|
||||
)
|
||||
game.add_message(diplo_message)
|
||||
game_history.add_message(
|
||||
game.current_short_phase,
|
||||
power_name,
|
||||
recipient,
|
||||
message.get("content", ""),
|
||||
)
|
||||
journal_recipient = f"to {recipient}" if recipient != GLOBAL else "globally"
|
||||
agent.add_journal_entry(
|
||||
f"Sent message {journal_recipient} in {game.current_short_phase}: "
|
||||
f"{message.get('content', '')[:100]}..."
|
||||
)
|
||||
logger.info(f"[{power_name} -> {recipient}] {message.get('content', '')[:100]}...")
|
||||
|
||||
logger.info("Negotiation phase complete.")
|
||||
return game_history
|
||||
|
||||
|
|
|
|||
|
|
@ -214,14 +214,10 @@ def construct_order_generation_prompt(
|
|||
include_messages=not _use_simple, # include only when *not* simple
|
||||
)
|
||||
|
||||
# Append goals at the end for focus
|
||||
goals_section = ""
|
||||
if agent_goals:
|
||||
goals_section = (
|
||||
"\n\nYOUR STRATEGIC GOALS:\n" + "\n".join(f"- {g}" for g in agent_goals) + "\n\nKeep these goals in mind when choosing your orders."
|
||||
)
|
||||
# delete unused section from context:
|
||||
context = context.replace('Messages This Round\n\n\nEnd Messages', '')
|
||||
|
||||
final_prompt = system_prompt + "\n\n" + context + "\n\n" + instructions + goals_section
|
||||
final_prompt = system_prompt + "\n\n" + context + "\n\n" + instructions
|
||||
|
||||
# Make the power names more LLM friendly
|
||||
final_prompt = (
|
||||
|
|
|
|||
|
|
@ -17,9 +17,6 @@ YOUR RELATIONSHIPS BEFORE THIS PHASE
|
|||
YOUR GOALS
|
||||
{agent_goals}
|
||||
|
||||
YOUR ACTUAL ORDERS
|
||||
{your_actual_orders}
|
||||
|
||||
TASK
|
||||
Analyze what actually happened this phase compared to negotiations and expectations.
|
||||
|
||||
|
|
|
|||
|
|
@ -8,11 +8,11 @@ Phase: {current_phase}
|
|||
Note: You can only build units in your home centers if they are empty. If you lose control of a home center, you cannot build units there, so holding them is critical.
|
||||
|
||||
# Player Status
|
||||
Current Goals: {agent_goals}
|
||||
Relationships: {agent_relationships}
|
||||
Current Goals:
|
||||
{agent_goals}
|
||||
|
||||
# Recent Private Diary Entries (Your inner thoughts and plans):
|
||||
{agent_private_diary}
|
||||
# Relationships:
|
||||
{agent_relationships}
|
||||
|
||||
# Order History
|
||||
{order_history}
|
||||
|
|
@ -28,6 +28,9 @@ Possible Orders For {current_phase}
|
|||
{possible_orders}
|
||||
End Possible Orders
|
||||
|
||||
# Recent Private Diary Entries (Your inner thoughts and plans):
|
||||
{agent_private_diary}
|
||||
|
||||
Messages This Round
|
||||
{messages_this_round}
|
||||
End Messages
|
||||
|
|
@ -4,24 +4,15 @@ Your Power: {power_name}
|
|||
GAME CONTEXT
|
||||
You are playing Diplomacy, a strategic board game set in pre-WWI Europe. Seven powers compete for control by conquering supply centers. Victory requires 18 supply centers.
|
||||
|
||||
Key game mechanics:
|
||||
- Spring (S) and Fall (F) movement phases where armies/fleets move
|
||||
- Fall phases include builds/disbands based on supply center control
|
||||
- Units can support, convoy, or attack
|
||||
- All orders resolve simultaneously
|
||||
- Success often requires negotiated coordination with other powers
|
||||
|
||||
FULL DIARY HISTORY
|
||||
{full_diary_text}
|
||||
|
||||
TASK
|
||||
Create a comprehensive consolidated summary of the most important parts of this diary history. It will serve as your long-term memory.
|
||||
Create a concise consolidated summary of the most important parts of this diary history. It will serve as your long-term memory. Do not include anything that is not strategically or diplomatically useful going forward. Aim for 300 words.
|
||||
|
||||
Prioritize the following:
|
||||
1. **Recent Events, Goals & Intentions**
|
||||
2. **Long-Term Strategy:** Enduring goals, rivalries, and alliances that are still relevant.
|
||||
3. **Key Historical Events:** Major betrayals, decisive battles, and significant turning points that shape the current diplomatic landscape.
|
||||
4. **Important Notes:** Any notes you deem important from the history not already included.
|
||||
1. **Key Historical Diplomatic Events:** Prioritise both *strategically impactful* and *recent* events.
|
||||
2. **Information that has ongoing importance & usefulness**
|
||||
|
||||
RESPONSE FORMAT
|
||||
Return ONLY the consolidated summary text. Do not include JSON, formatting markers, or meta-commentary.
|
||||
|
|
@ -2,35 +2,45 @@ NEGOTIATION SUMMARY REQUEST
|
|||
Power: {power_name}
|
||||
Phase: {current_phase}
|
||||
|
||||
MESSAGES THIS ROUND
|
||||
{messages_this_round}
|
||||
|
||||
CURRENT STATUS
|
||||
Goals:
|
||||
Goals (may need updating):
|
||||
{agent_goals}
|
||||
|
||||
Relationships:
|
||||
Relationships (may need updating):
|
||||
{agent_relationships}
|
||||
|
||||
Game State:
|
||||
{board_state_str}
|
||||
|
||||
Private Diary:
|
||||
{private_diary_summary}
|
||||
|
||||
Messages This Round:
|
||||
{messages_this_round}
|
||||
|
||||
|
||||
|
||||
TASK
|
||||
Analyze the negotiations, goals, relationships, and game state to:
|
||||
1. Summarize key outcomes and agreements
|
||||
2. State your specific intents for {current_phase}, including moves you have agreed to in negotiations and whether you intend to fulfil them.
|
||||
1. Summarize key outcomes and agreements concisely
|
||||
2. Concisely state your specific intents for {current_phase}, including moves you have agreed to in negotiations and whether you intend to fulfil them.
|
||||
3. Update relationships as needed (Enemy, Unfriendly, Neutral, Friendly, Ally)
|
||||
4. Important: You will not see the full negotiation log in the order decision phase, so you must transmit key information about the negotiations to your future self via this summary.
|
||||
4. Include your latest overarching goals (including any updates)
|
||||
5. Important: You will not see the full negotiation log in the order decision phase, so you must transmit key information about the negotiations to your future self via this summary.
|
||||
|
||||
RESPONSE FORMAT
|
||||
Return ONLY a JSON object with this structure:
|
||||
|
||||
{
|
||||
"negotiation_summary": "Key outcomes from negotiations",
|
||||
"intent": "Specific intent for upcoming orders",
|
||||
"updated_relationships": {
|
||||
"POWER_NAME": "Enemy|Unfriendly|Neutral|Friendly|Ally"
|
||||
}
|
||||
}
|
||||
{{
|
||||
"negotiation_summary": "Key outcomes from negotiations",
|
||||
"intent": "Specific intent for upcoming orders this phase",
|
||||
"updated_relationships": {{
|
||||
"POWER_NAME": "Enemy|Unfriendly|Neutral|Friendly|Ally"
|
||||
}},
|
||||
"goals": [
|
||||
"goal 1",
|
||||
"goal 2",
|
||||
...
|
||||
]
|
||||
}}
|
||||
|
||||
Reminder: If you need to quote something, only use single quotes in the actual messages so as not to interfere with the JSON structure.
|
||||
|
|
@ -1,7 +1,13 @@
|
|||
PHASE RESULT ANALYSIS
|
||||
Power: {power_name}
|
||||
Your Power: {power_name}
|
||||
Phase: {current_phase}
|
||||
|
||||
RECENT DIARY ENTRIES
|
||||
{formatted_diary}
|
||||
|
||||
BOARD STATE
|
||||
{board_state}
|
||||
|
||||
PHASE SUMMARY
|
||||
{phase_summary}
|
||||
|
||||
|
|
@ -17,9 +23,6 @@ YOUR RELATIONSHIPS BEFORE THIS PHASE
|
|||
YOUR GOALS
|
||||
{agent_goals}
|
||||
|
||||
YOUR ACTUAL ORDERS
|
||||
{your_actual_orders}
|
||||
|
||||
TASK
|
||||
Analyze what actually happened this phase compared to negotiations and expectations.
|
||||
|
||||
|
|
@ -29,12 +32,12 @@ Consider:
|
|||
3. SURPRISES: What unexpected moves occurred?
|
||||
4. IMPACT: How did these events affect your strategic position?
|
||||
|
||||
Write a reflective diary entry (150-250 words) that:
|
||||
- Identifies key betrayals or successful collaborations
|
||||
- Assesses impact on your position
|
||||
- Updates your understanding of other powers' trustworthiness
|
||||
- Notes strategic lessons learned
|
||||
- Adjusts your perception of threats and opportunities
|
||||
Write a concise diary entry (100-150 words) of the most important things you would like to remember, e.g.:
|
||||
- Key betrayals or successful collaborations
|
||||
- Assess impact on your position
|
||||
- Update your understanding of other powers' trustworthiness
|
||||
- Strategic lessons learned
|
||||
- Moves that failed, and ideas on how to avoid the error in the future
|
||||
|
||||
Focus on concrete events and their implications for your future strategy.
|
||||
|
||||
|
|
|
|||
|
|
@ -69,6 +69,12 @@ class StatisticalGameAnalyzer:
|
|||
'order_generation', 'order_diary', 'state_update_parsing_empty_or_invalid_data',
|
||||
'diary_consolidation', 'state_update_partial_data', 'state_update_no_response'
|
||||
]
|
||||
|
||||
ORDER_TYPES = [
|
||||
"move", "hold", "support", "convoy",
|
||||
"build", "disband", "waive", "other",
|
||||
"retreat"
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize analyzer with configuration constants."""
|
||||
|
|
@ -234,6 +240,103 @@ class StatisticalGameAnalyzer:
|
|||
|
||||
return responses
|
||||
|
||||
def _extract_order_results_features(self, power: str, phase_data: dict) -> dict:
|
||||
"""
|
||||
Count orders and outcomes for a single power in one phase and add
|
||||
a success-rate (0-1) for every order type.
|
||||
"""
|
||||
features: dict[str, float | int] = {}
|
||||
for ot in self.ORDER_TYPES:
|
||||
plural = f"{ot}s" if not ot.endswith("s") else ot
|
||||
for metric in ("total", "success", "bounce", "void", "invalid"):
|
||||
features[f"orders_{plural}_{metric}"] = 0
|
||||
features[f"orders_{plural}_success_rate"] = 0.0 # ← new
|
||||
|
||||
orders_by_type = phase_data.get("order_results", {}).get(power, {})
|
||||
if not orders_by_type:
|
||||
return features
|
||||
|
||||
for otype, order_list in orders_by_type.items():
|
||||
otype = otype.lower()
|
||||
if otype not in self.ORDER_TYPES:
|
||||
otype = "other"
|
||||
plural = f"{otype}s" if not otype.endswith("s") else otype
|
||||
|
||||
for entry in order_list:
|
||||
result = str(entry.get("result", "")).lower().strip()
|
||||
key_base = f"orders_{plural}"
|
||||
features[f"{key_base}_total"] += 1
|
||||
match result:
|
||||
case "success":
|
||||
features[f"{key_base}_success"] += 1
|
||||
case "bounce":
|
||||
features[f"{key_base}_bounce"] += 1
|
||||
case "invalid":
|
||||
features[f"{key_base}_invalid"] += 1
|
||||
case _ if result in ("void", "void: no effect", ""):
|
||||
features[f"{key_base}_void"] += 1
|
||||
|
||||
# ── derive success rates ──
|
||||
for ot in self.ORDER_TYPES:
|
||||
plural = f"{ot}s" if not ot.endswith("s") else ot
|
||||
succ = features[f"orders_{plural}_success"]
|
||||
tot = features[f"orders_{plural}_total"]
|
||||
features[f"orders_{plural}_success_rate"] = succ / tot if tot else 0.0
|
||||
|
||||
return features
|
||||
|
||||
|
||||
|
||||
# ────────────────── GAME-LEVEL ORDER TOTALS ──────────────────
|
||||
def _aggregate_order_results(self, power: str, game_data: dict) -> dict:
|
||||
"""
|
||||
Sum every order-type/result pair over *all* phases for one power
|
||||
and add success-rate (0-1) columns.
|
||||
"""
|
||||
totals: dict[str, float | int] = {}
|
||||
for ot in self.ORDER_TYPES:
|
||||
plural = f"{ot}s" if not ot.endswith("s") else ot
|
||||
for metric in ("total", "success", "bounce", "void", "invalid"):
|
||||
totals[f"orders_{plural}_{metric}"] = 0
|
||||
totals[f"orders_{plural}_success_rate"] = 0.0 # ← new
|
||||
|
||||
for phase in game_data.get("phases", []):
|
||||
orders_by_type = phase.get("order_results", {}).get(power, {})
|
||||
if not orders_by_type:
|
||||
continue
|
||||
|
||||
for otype, order_list in orders_by_type.items():
|
||||
otype = otype.lower()
|
||||
if otype not in self.ORDER_TYPES:
|
||||
otype = "other"
|
||||
plural = f"{otype}s" if not otype.endswith("s") else otype
|
||||
|
||||
for entry in order_list:
|
||||
result = str(entry.get("result", "")).lower().strip()
|
||||
key_base = f"orders_{plural}"
|
||||
totals[f"{key_base}_total"] += 1
|
||||
match result:
|
||||
case "success":
|
||||
totals[f"{key_base}_success"] += 1
|
||||
case "bounce":
|
||||
totals[f"{key_base}_bounce"] += 1
|
||||
case "invalid":
|
||||
totals[f"{key_base}_invalid"] += 1
|
||||
case _ if result in ("void", "void: no effect", ""):
|
||||
totals[f"{key_base}_void"] += 1
|
||||
|
||||
# ── derive success rates ──
|
||||
for ot in self.ORDER_TYPES:
|
||||
plural = f"{ot}s" if not ot.endswith("s") else ot
|
||||
succ = totals[f"orders_{plural}_success"]
|
||||
tot = totals[f"orders_{plural}_total"]
|
||||
totals[f"orders_{plural}_success_rate"] = succ / tot if tot else 0.0
|
||||
|
||||
return totals
|
||||
|
||||
|
||||
|
||||
|
||||
def _extract_phase_features(self, llm_responses: List[dict], game_data: dict) -> List[dict]:
|
||||
"""Extract phase-level features for all powers, phases, and response types."""
|
||||
phase_features = []
|
||||
|
|
@ -294,6 +397,10 @@ class StatisticalGameAnalyzer:
|
|||
# === FAILURE ANALYSIS (HARD MODE) ===
|
||||
failure_metrics = self._analyze_failures(power, phase, response_type, llm_responses)
|
||||
features.update(failure_metrics)
|
||||
|
||||
# === ORDER-RESULT METRICS ===
|
||||
order_result_features = self._extract_order_results_features(power, phase_data)
|
||||
features.update(order_result_features)
|
||||
|
||||
|
||||
# Add response-type specific features
|
||||
|
|
@ -794,7 +901,10 @@ class StatisticalGameAnalyzer:
|
|||
if total_calls > 0:
|
||||
features['overall_failure_rate_percentage'] = (total_failures / total_calls) * 100.0
|
||||
features['overall_success_rate_percentage'] = (total_successes / total_calls) * 100.0
|
||||
|
||||
|
||||
# === ORDER TOTALS (whole game) ===
|
||||
order_totals = self._aggregate_order_results(power, game_data)
|
||||
features.update(order_totals)
|
||||
|
||||
# Helper methods
|
||||
|
||||
|
|
@ -1067,6 +1177,15 @@ class StatisticalGameAnalyzer:
|
|||
'military_units_gained_vs_prev_phase',
|
||||
'relationships'
|
||||
]
|
||||
|
||||
# ensure order columns
|
||||
for ot in self.ORDER_TYPES:
|
||||
plural = f"{ot}s" if not ot.endswith("s") else ot
|
||||
for suffix in ("total", "success", "bounce", "void", "invalid", "success_rate"):
|
||||
col = f"orders_{plural}_{suffix}"
|
||||
if col not in fieldnames:
|
||||
fieldnames.append(col)
|
||||
|
||||
|
||||
# Ensure all actual fields are included (in case we missed any)
|
||||
actual_fields = set()
|
||||
|
|
@ -1140,6 +1259,17 @@ class StatisticalGameAnalyzer:
|
|||
# === Diplobench style single scalar game score ===
|
||||
'game_score',
|
||||
]
|
||||
|
||||
# ensure order-total columns
|
||||
for ot in self.ORDER_TYPES:
|
||||
plural = f"{ot}s" if not ot.endswith("s") else ot
|
||||
base = f"orders_{plural}_total"
|
||||
for suffix in ("total", "success", "bounce", "void", "invalid", "success_rate"):
|
||||
col = f"orders_{plural}_{suffix}"
|
||||
if col not in fieldnames:
|
||||
fieldnames.append(col)
|
||||
|
||||
|
||||
|
||||
# Ensure all actual fields are included
|
||||
actual_fields = set()
|
||||
|
|
|
|||
|
|
@ -227,6 +227,8 @@ def _plot_relationships_per_game(
|
|||
|
||||
# ── NEW: discard rows with no relationship info ────────────
|
||||
game_df = game_df[game_df["rel_dict"].apply(bool)]
|
||||
# ── keep only MOVE phases; drop retreat (R) and adjustment (A) ─────
|
||||
game_df = game_df[game_df["game_phase"].str.upper().str.endswith("M")]
|
||||
if game_df.empty: # nothing left to plot
|
||||
continue
|
||||
|
||||
|
|
@ -334,9 +336,18 @@ def _plot_relationships_per_game(
|
|||
else to_rgba(base_colour, alpha=0.35)
|
||||
)
|
||||
|
||||
# ── “double” a lone point so it shows up as a short flat line ──
|
||||
finite_pts = [(x, y) for x, y in zip(data["x"], y_off) if not math.isnan(y)]
|
||||
if len(finite_pts) == 1:
|
||||
x0, y0 = finite_pts[0]
|
||||
xs = [x0 - 0.05, x0 + 0.05] # tiny horizontal spread
|
||||
ys = [y0, y0]
|
||||
else:
|
||||
xs, ys = data["x"], y_off
|
||||
|
||||
plt.plot(
|
||||
data["x"],
|
||||
y_off,
|
||||
xs,
|
||||
ys,
|
||||
label=f"{other} ({kind})",
|
||||
color=colour,
|
||||
linewidth=2,
|
||||
|
|
|
|||
|
|
@ -169,6 +169,7 @@ def run(exp_dir: Path, ctx: dict): # pylint: disable=unused-argument
|
|||
sns.set_style("whitegrid")
|
||||
plt.figure(figsize=(10, 7))
|
||||
sns.boxplot(x="Power", y="SupplyCenters", data=df, palette="pastel")
|
||||
plt.ylim(0, 18)
|
||||
plt.title("Supply-center distribution")
|
||||
plt.savefig(analysis_dir / "results_summary.png", dpi=150)
|
||||
plt.close()
|
||||
|
|
|
|||
48
lm_game.py
48
lm_game.py
|
|
@ -334,6 +334,17 @@ async def main():
|
|||
if neg_diary_tasks:
|
||||
await asyncio.gather(*neg_diary_tasks, return_exceptions=True)
|
||||
|
||||
# Diary Consolidation
|
||||
if current_short_phase.startswith("S") and current_short_phase.endswith("M"):
|
||||
consolidation_tasks = [
|
||||
run_diary_consolidation(agent, game, llm_log_file_path,
|
||||
prompts_dir=agent.prompts_dir)
|
||||
for agent in agents.values()
|
||||
if not game.powers[agent.power_name].is_eliminated()
|
||||
]
|
||||
if consolidation_tasks:
|
||||
await asyncio.gather(*consolidation_tasks, return_exceptions=True)
|
||||
|
||||
# --- 4c. Order Generation ---
|
||||
logger.info("Getting orders from agents...")
|
||||
board_state = game.get_state()
|
||||
|
|
@ -350,7 +361,7 @@ async def main():
|
|||
game, agent.client, board_state, power_name, possible_orders,
|
||||
game_history, model_error_stats,
|
||||
agent_goals=agent.goals, agent_relationships=agent.relationships,
|
||||
agent_private_diary_str=agent.format_private_diary_for_prompt(),
|
||||
agent_private_diary_str=agent.get_latest_phase_diary_entries(), # only include latest phase in orders prompt
|
||||
log_file_path=llm_log_file_path, phase=current_phase,
|
||||
)
|
||||
)
|
||||
|
|
@ -378,10 +389,11 @@ async def main():
|
|||
submitted_orders_this_phase[p_name] = valid + invalid
|
||||
|
||||
# diary entry only for the orders we tried to submit
|
||||
if valid or invalid:
|
||||
await agents[p_name].generate_order_diary_entry(
|
||||
game, valid + invalid, llm_log_file_path
|
||||
)
|
||||
if False: # disabled for now
|
||||
if valid or invalid:
|
||||
await agents[p_name].generate_order_diary_entry(
|
||||
game, valid + invalid, llm_log_file_path
|
||||
)
|
||||
|
||||
# --- 4d. Process Phase ---
|
||||
completed_phase = current_phase
|
||||
|
|
@ -414,26 +426,18 @@ async def main():
|
|||
all_orders_this_phase = game.order_history.get(current_short_phase, {})
|
||||
|
||||
# Phase Result Diary Entries
|
||||
phase_result_diary_tasks = [
|
||||
agent.generate_phase_result_diary_entry(game, game_history, phase_summary, all_orders_this_phase, llm_log_file_path)
|
||||
for agent in agents.values() if not game.powers[agent.power_name].is_eliminated()
|
||||
]
|
||||
if phase_result_diary_tasks:
|
||||
await asyncio.gather(*phase_result_diary_tasks, return_exceptions=True)
|
||||
|
||||
# Diary Consolidation
|
||||
if current_short_phase.startswith("S") and current_short_phase.endswith("M"):
|
||||
consolidation_tasks = [
|
||||
run_diary_consolidation(agent, game, llm_log_file_path,
|
||||
prompts_dir=agent.prompts_dir)
|
||||
for agent in agents.values()
|
||||
if not game.powers[agent.power_name].is_eliminated()
|
||||
if current_short_phase.endswith("M"):
|
||||
phase_result_diary_tasks = [
|
||||
agent.generate_phase_result_diary_entry(game, game_history, phase_summary, all_orders_this_phase, llm_log_file_path, current_short_phase)
|
||||
for agent in agents.values() if not game.powers[agent.power_name].is_eliminated()
|
||||
]
|
||||
if consolidation_tasks:
|
||||
await asyncio.gather(*consolidation_tasks, return_exceptions=True)
|
||||
if phase_result_diary_tasks:
|
||||
await asyncio.gather(*phase_result_diary_tasks, return_exceptions=True)
|
||||
|
||||
|
||||
|
||||
# Agent State Updates
|
||||
if current_short_phase.endswith("M"):
|
||||
if current_short_phase.endswith("M") and run_config.num_negotiation_rounds == 0: # r'ships are updated in negotiation round. otherwise in no press, updated in a separate step.
|
||||
current_board_state = game.get_state()
|
||||
state_update_tasks = [
|
||||
agent.analyze_phase_and_update_state(game, current_board_state, phase_summary, game_history, llm_log_file_path)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue