from dotenv import load_dotenv import logging import os from typing import Dict, List, Tuple, Set, Optional from diplomacy import Game import csv from typing import TYPE_CHECKING import random import string import json # Avoid circular import for type hinting if TYPE_CHECKING: from .clients import BaseModelClient # If DiplomacyAgent is used for type hinting for an 'agent' parameter: # from .agent import DiplomacyAgent logger = logging.getLogger("utils") logger.setLevel(logging.INFO) logging.basicConfig(level=logging.INFO) load_dotenv() def atomic_write_json(data: dict, filepath: str): """Writes a dictionary to a JSON file atomically.""" try: # Ensure the directory exists dir_name = os.path.dirname(filepath) if dir_name: os.makedirs(dir_name, exist_ok=True) # Write to a temporary file in the same directory temp_filepath = f"{filepath}.tmp.{os.getpid()}" with open(temp_filepath, 'w', encoding='utf-8') as f: json.dump(data, f, indent=4) # Atomically rename the temporary file to the final destination os.rename(temp_filepath, filepath) except Exception as e: logger.error(f"Failed to perform atomic write to {filepath}: {e}", exc_info=True) # Clean up temp file if it exists if os.path.exists(temp_filepath): try: os.remove(temp_filepath) except Exception as e_clean: logger.error(f"Failed to clean up temp file {temp_filepath}: {e_clean}") def assign_models_to_powers() -> Dict[str, str]: """ Example usage: define which model each power uses. Return a dict: { power_name: model_id, ... } POWERS = ['AUSTRIA', 'ENGLAND', 'FRANCE', 'GERMANY', 'ITALY', 'RUSSIA', 'TURKEY'] Models supported: o3-mini, o4-mini, o3, gpt-4o, gpt-4o-mini, claude-opus-4-20250514, claude-sonnet-4-20250514, claude-3-5-haiku-20241022, claude-3-5-sonnet-20241022, claude-3-7-sonnet-20250219 gemini-2.0-flash, gemini-2.5-flash-preview-04-17, gemini-2.5-pro-preview-03-25, deepseek-chat, deepseek-reasoner openrouter-meta-llama/llama-3.3-70b-instruct, openrouter-qwen/qwen3-235b-a22b, openrouter-microsoft/phi-4-reasoning-plus:free, openrouter-deepseek/deepseek-prover-v2:free, openrouter-meta-llama/llama-4-maverick:free, openrouter-nvidia/llama-3.3-nemotron-super-49b-v1:free, openrouter-google/gemma-3-12b-it:free, openrouter-google/gemini-2.5-flash-preview-05-20 """ # POWER MODELS """ return { "AUSTRIA": "openrouter-google/gemini-2.5-flash-preview-05-20", "ENGLAND": "openrouter-moonshotai/kimi-dev-72b:free", "FRANCE": "together-arcee-ai/AFM-4.5B-Preview", "GERMANY": "openrouter-google/gemini-2.5-flash-lite-preview-06-17", "ITALY": "together-lgai/exaone-deep-32b", "RUSSIA": "deepseek-reasoner", "TURKEY": "openrouter-cohere/command-a", } """ # TEST MODELS return { "AUSTRIA": "openrouter-mistralai/mistral-small-3.2-24b-instruct", "ENGLAND": "openrouter-mistralai/mistral-small-3.2-24b-instruct", "FRANCE": "openrouter-mistralai/mistral-small-3.2-24b-instruct", "GERMANY": "openrouter-mistralai/mistral-small-3.2-24b-instruct", "ITALY": "openrouter-mistralai/mistral-small-3.2-24b-instruct", "RUSSIA": "openrouter-mistralai/mistral-small-3.2-24b-instruct", "TURKEY": "openrouter-mistralai/mistral-small-3.2-24b-instruct", } def gather_possible_orders(game: Game, power_name: str) -> Dict[str, List[str]]: """ Returns a dictionary mapping each orderable location to the list of valid orders. """ orderable_locs = game.get_orderable_locations(power_name) all_possible = game.get_all_possible_orders() result = {} for loc in orderable_locs: result[loc] = all_possible.get(loc, []) return result async def get_valid_orders( game: Game, client, # BaseModelClient instance board_state, power_name: str, possible_orders: Dict[str, List[str]], game_history, model_error_stats, agent_goals=None, agent_relationships=None, agent_private_diary_str=None, log_file_path: str = None, phase: str = None, ) -> Dict[str, List[str]]: """ Generates orders with the LLM, validates them by round-tripping through the engine, and returns **both** the accepted and rejected orders so the caller can record invalid attempts. Returns ------- dict : { "valid": [...], "invalid": [...] } """ # ── 1. Ask the model ─────────────────────────────────────── raw_orders = await client.get_orders( game=game, board_state=board_state, power_name=power_name, possible_orders=possible_orders, conversation_text=game_history, model_error_stats=model_error_stats, agent_goals=agent_goals, agent_relationships=agent_relationships, agent_private_diary_str=agent_private_diary_str, log_file_path=log_file_path, phase=phase, ) invalid_info: list[str] = [] valid: list[str] = [] invalid: list[str] = [] # ── 2. Type check ────────────────────────────────────────── if not isinstance(raw_orders, list): logger.warning("[%s] Orders received from LLM are not a list: %s. Using fallback.", power_name, raw_orders) model_error_stats[client.model_name]["order_decoding_errors"] += 1 return {"valid": client.fallback_orders(possible_orders), "invalid": []} # ── 3. Round-trip validation with engine ─────────────────── CODE_TO_ENGINE = { "AUT": "AUSTRIA", "ENG": "ENGLAND", "FRA": "FRANCE", "GER": "GERMANY", "ITA": "ITALY", "RUS": "RUSSIA", "TUR": "TURKEY", } engine_power = power_name if power_name in game.powers else CODE_TO_ENGINE[power_name] for move in raw_orders: if not move or not move.strip(): continue upper = move.upper() # WAIVE is always valid if upper == "WAIVE": valid.append("WAIVE") continue game.clear_orders(engine_power) game.set_orders(engine_power, [upper]) normed = game.get_orders(engine_power) if normed: # accepted valid.append(normed[0]) else: # rejected invalid.append(upper) invalid_info.append(f"Order '{move}' is invalid for {power_name}") game.clear_orders(engine_power) # clean slate for main engine flow # ── 4. Legacy logging & stats updates ────────────────────── if invalid_info: # at least one bad move logger.debug("[%s] Invalid orders: %s", power_name, ", ".join(invalid_info)) model_error_stats[client.model_name]["order_decoding_errors"] += 1 logger.debug("[%s] Some orders invalid, using fallback.", power_name) else: logger.debug("[%s] All orders valid: %s", power_name, valid) # ── 5. Fallback when nothing survives ───────────────────── if not valid: fallback = client.fallback_orders(possible_orders) return {"valid": fallback, "invalid": invalid} return {"valid": valid, "invalid": invalid} def normalize_and_compare_orders( issued_orders: Dict[str, List[str]], accepted_orders_dict: Dict[str, List[str]], game: Game, ) -> Tuple[Dict[str, Set[str]], Dict[str, Set[str]]]: """ Normalizes and compares issued orders against accepted orders from the game engine. Uses the map's built-in normalization methods to ensure consistent formatting. Args: issued_orders: Dictionary of orders issued by power {power_name: [orders]} accepted_orders_dict: Dictionary of orders accepted by the engine, typically from game.get_state()["orders"]. game: The current Game object containing the map. Returns: Tuple[Dict[str, Set[str]], Dict[str, Set[str]]]: (orders_not_accepted, orders_not_issued) - orders_not_accepted: Orders issued but not accepted by engine (normalized). - orders_not_issued: Orders accepted by engine but not issued (normalized). """ game_map = game.map def normalize_order(order: str) -> str: # Inner function to normalize a single order string using the game map. if not order: return order try: # Use map's normalization methods directly normalized = game_map.norm(order) # Further split and normalize parts for complex orders if necessary # (This part might need refinement depending on how complex orders are handled # and represented after initial normalization by game_map.norm) # Example (simplified, game_map.norm often handles this): # Split support orders # parts = normalized.split(" S ") # normalized_parts = [] # for part in parts: # move_parts = part.split(" - ") # move_parts = [game_map.norm(p.strip()) for p in move_parts] # move_parts = [game_map.aliases.get(p, p) for p in move_parts] # normalized_parts.append(" - ".join(move_parts)) # return " S ".join(normalized_parts) return normalized # Return the directly normalized string for now except Exception as e: logger.warning(f"Could not normalize order '{order}': {e}") return order # Return original if normalization fails orders_not_accepted = {} orders_not_issued = {} all_powers = set(issued_orders.keys()) | set(accepted_orders_dict.keys()) for pwr in all_powers: # Normalize issued orders for the power, handling potential absence issued_set = set() if pwr in issued_orders: try: issued_set = {normalize_order(o) for o in issued_orders.get(pwr, []) if o} except Exception as e: logger.error(f"Error normalizing issued orders for {pwr}: {e}") # Normalize accepted orders for the power, handling potential absence accepted_set = set() if pwr in accepted_orders_dict: try: accepted_set = {normalize_order(o) for o in accepted_orders_dict.get(pwr, []) if o} except Exception as e: logger.error(f"Error normalizing accepted orders for {pwr}: {e}") # Compare the sets missing_from_engine = issued_set - accepted_set missing_from_issued = accepted_set - issued_set if missing_from_engine: orders_not_accepted[pwr] = missing_from_engine if missing_from_issued: orders_not_issued[pwr] = missing_from_issued return orders_not_accepted, orders_not_issued # Helper to load prompt text from file relative to the expected 'prompts' dir def load_prompt(filename: str, prompts_dir: Optional[str] = None) -> str: """ Return the contents of *filename* while never joining paths twice. Logic ----- 1. If *filename* is absolute → use it directly. 2. Elif *filename* already contains a path component (e.g. 'x/y.txt') → treat it as a relative path and use it directly. 3. Elif *prompts_dir* is provided → join prompts_dir + filename. 4. Otherwise → join the package’s default prompts dir. """ if os.path.isabs(filename): # rule 1 prompt_path = filename elif os.path.dirname(filename): # rule 2 (has slash) # If it's a relative path with directory, join with prompts_dir if provided if prompts_dir: prompt_path = os.path.join(prompts_dir, filename) else: default_dir = os.path.join(os.path.dirname(__file__), "prompts") prompt_path = os.path.join(default_dir, filename) elif prompts_dir: # rule 3 prompt_path = os.path.join(prompts_dir, filename) else: # rule 4 default_dir = os.path.join(os.path.dirname(__file__), "prompts") prompt_path = os.path.join(default_dir, filename) try: with open(prompt_path, "r", encoding="utf-8") as fh: return fh.read().strip() except FileNotFoundError: logger.error(f"Prompt file not found: {prompt_path}") return "" # == New LLM Response Logging Function == def log_llm_response( log_file_path: str, model_name: str, power_name: Optional[str], # Optional for non-power-specific calls like summary phase: str, response_type: str, raw_input_prompt: str, # Added new parameter for the raw input raw_response: str, success: str, # Changed from bool to str ): """Appends a raw LLM response to a CSV log file.""" try: # Ensure the directory exists log_dir = os.path.dirname(log_file_path) if log_dir: # Ensure log_dir is not empty (e.g., if path is just a filename) os.makedirs(log_dir, exist_ok=True) # Check if file exists and has content to determine if we need headers file_exists = os.path.isfile(log_file_path) and os.path.getsize(log_file_path) > 0 with open(log_file_path, "a", newline="", encoding="utf-8") as csvfile: # Added "raw_input" to fieldnames fieldnames = ["model", "power", "phase", "response_type", "raw_input", "raw_response", "success"] writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_ALL, # Quote all fields to handle commas and newlines escapechar='\\') # Use backslash for escaping if not file_exists: writer.writeheader() # Write header only if file is new writer.writerow({ "model": model_name, "power": power_name if power_name else "game", # Use 'game' if no specific power "phase": phase, "response_type": response_type, "raw_input": raw_input_prompt, # Added raw_input to the row "raw_response": raw_response, "success": success, }) except Exception as e: logger.error(f"Failed to log LLM response to {log_file_path}: {e}", exc_info=True) # == New Async LLM Wrapper with Logging == async def run_llm_and_log( client: 'BaseModelClient', prompt: str, log_file_path: str, # Kept for context, but not used for logging here power_name: Optional[str], # Kept for context, but not used for logging here phase: str, # Kept for context, but not used for logging here response_type: str, # Kept for context, but not used for logging here temperature: float = 0.0, ) -> str: """Calls the client's generate_response and returns the raw output. Logging is handled by the caller.""" raw_response = "" # Initialize in case of error try: raw_response = await client.generate_response(prompt, temperature=temperature) except Exception as e: # Log the API call error. The caller will decide how to log this in llm_responses.csv logger.error(f"API Error during LLM call for {client.model_name}/{power_name}/{response_type} in phase {phase}: {e}", exc_info=True) # raw_response remains "" indicating failure to the caller return raw_response # This generates a few lines of random alphanum chars to inject into the # system prompt. This lets us use temp=0 while still getting variation # between trials. # Temp=0 is important for better performance on deciding moves, and to # ensure valid json outputs. def generate_random_seed(n_lines: int = 5, n_chars_per_line: int = 80): # Generate x lines of y random alphanumeric characters seed_lines = [ ''.join(random.choices(string.ascii_letters + string.digits, k=n_chars_per_line)) for _ in range(n_lines) ] random_seed_block = ( "\n" + "\n".join(seed_lines) + "\n" ) return random_seed_block