diff --git a/lm_game.py b/lm_game.py index 7c0bcb2..059884c 100644 --- a/lm_game.py +++ b/lm_game.py @@ -20,7 +20,11 @@ os.environ["GRPC_POLL_STRATEGY"] = "poll" # Use 'poll' for macOS compatibility from diplomacy import Game -from ai_diplomacy.utils import get_valid_orders, gather_possible_orders, parse_prompts_dir_arg +from ai_diplomacy.utils import ( + get_valid_orders, + gather_possible_orders, + parse_prompts_dir_arg, +) from ai_diplomacy.negotiations import conduct_negotiations from ai_diplomacy.planning import planning_phase from ai_diplomacy.game_history import GameHistory @@ -54,10 +58,12 @@ def _str2bool(v: str) -> bool: return False raise argparse.ArgumentTypeError(f"Boolean value expected, got '{v}'") + def _detect_victory(game: Game, threshold: int = 18) -> bool: """True iff any power already owns ≥ `threshold` supply centres.""" return any(len(p.centers) >= threshold for p in game.powers.values()) + def parse_arguments(): parser = argparse.ArgumentParser( description="Run a Diplomacy game simulation with configurable parameters." @@ -69,10 +75,10 @@ def parse_arguments(): help="Directory for results. If it exists, the game resumes. If not, it's created. Defaults to a new timestamped directory.", ) parser.add_argument( - "--output", # alias for back compatibility - dest="run_dir", # write to the same variable as --run_dir + "--output", # alias for back compatibility + dest="run_dir", # write to the same variable as --run_dir type=str, - help=argparse.SUPPRESS # hides it from `--help` + help=argparse.SUPPRESS, # hides it from `--help` ) parser.add_argument( "--critical_state_analysis_dir", @@ -113,7 +119,7 @@ def parse_arguments(): ), ) parser.add_argument( - "--planning_phase", + "--planning_phase", action="store_true", help="Enable the planning phase for each power to set strategic directives.", ) @@ -121,25 +127,25 @@ def parse_arguments(): "--max_tokens", type=int, default=16000, - help="Maximum number of new tokens to generate per LLM call (default: 16000)." + help="Maximum number of new tokens to generate per LLM call (default: 16000).", ) parser.add_argument( "--seed_base", type=int, default=42, - help="RNG seed placeholder for compatibility with experiment_runner. Currently unused." + help="RNG seed placeholder for compatibility with experiment_runner. Currently unused.", ) parser.add_argument( "--max_tokens_per_model", type=str, default="", - help="Comma-separated list of 7 token limits (in order: AUSTRIA, ENGLAND, FRANCE, GERMANY, ITALY, RUSSIA, TURKEY). Overrides --max_tokens." + help="Comma-separated list of 7 token limits (in order: AUSTRIA, ENGLAND, FRANCE, GERMANY, ITALY, RUSSIA, TURKEY). Overrides --max_tokens.", ) parser.add_argument( "--prompts_dir", type=str, default=None, - help="Path to the directory containing prompt files. Defaults to the packaged prompts directory." + help="Path to the directory containing prompt files. Defaults to the packaged prompts directory.", ) parser.add_argument( "--simple_prompts", @@ -193,22 +199,28 @@ async def main(): args = parse_arguments() start_whole = time.time() - logger.info(f"args.simple_prompts = {args.simple_prompts} (type: {type(args.simple_prompts)}), args.prompts_dir = {args.prompts_dir}") + logger.info( + f"args.simple_prompts = {args.simple_prompts} (type: {type(args.simple_prompts)}), args.prompts_dir = {args.prompts_dir}" + ) logger.info(f"config.SIMPLE_PROMPTS before update = {config.SIMPLE_PROMPTS}") - + # IMPORTANT: Check if user explicitly provided a prompts_dir user_provided_prompts_dir = args.prompts_dir is not None - + if args.simple_prompts: config.SIMPLE_PROMPTS = True if args.prompts_dir is None: pkg_root = os.path.join(os.path.dirname(__file__), "ai_diplomacy") args.prompts_dir = os.path.join(pkg_root, "prompts_simple") - logger.info(f"Set prompts_dir to {args.prompts_dir} because simple_prompts=True and prompts_dir was None") + logger.info( + f"Set prompts_dir to {args.prompts_dir} because simple_prompts=True and prompts_dir was None" + ) else: # User provided their own prompts_dir, but simple_prompts is True # This is likely a conflict - warn the user - logger.warning(f"Both --simple_prompts=True and --prompts_dir={args.prompts_dir} were specified. Using user-provided prompts_dir.") + logger.warning( + f"Both --simple_prompts=True and --prompts_dir={args.prompts_dir} were specified. Using user-provided prompts_dir." + ) else: logger.info(f"simple_prompts is False, using prompts_dir: {args.prompts_dir}") @@ -224,6 +236,7 @@ async def main(): # Handle phase summaries flag - import narrative module only if enabled if args.generate_phase_summaries: import ai_diplomacy.narrative + logger.info("Phase summary generation enabled") else: logger.info("Phase summary generation disabled") @@ -231,15 +244,19 @@ async def main(): # Handle unformatted prompts flag if args.use_unformatted_prompts: config.USE_UNFORMATTED_PROMPTS = True - logger.info("Using two-step approach: unformatted prompts + Gemini Flash formatting") + logger.info( + "Using two-step approach: unformatted prompts + Gemini Flash formatting" + ) else: config.USE_UNFORMATTED_PROMPTS = False logger.info("Using original single-step formatted prompts") - + # Handle country-specific prompts flag if args.country_specific_prompts: config.COUNTRY_SPECIFIC_PROMPTS = True - logger.info("Country-specific prompts enabled - powers will use their custom prompts when available") + logger.info( + "Country-specific prompts enabled - powers will use their custom prompts when available" + ) else: config.COUNTRY_SPECIFIC_PROMPTS = False logger.info("Using generic prompts for all powers") @@ -256,42 +273,49 @@ async def main(): is_resuming = False if run_dir and os.path.exists(run_dir) and not args.critical_state_analysis_dir: is_resuming = True - + if args.critical_state_analysis_dir: if not run_dir: - raise ValueError("--run_dir must be given when using --critical_state_analysis_dir") + raise ValueError( + "--run_dir must be given when using --critical_state_analysis_dir" + ) - original_run_dir = run_dir # where the live game lives - run_dir = args.critical_state_analysis_dir # where new artefacts will be written + original_run_dir = run_dir # where the live game lives + run_dir = ( + args.critical_state_analysis_dir + ) # where new artefacts will be written os.makedirs(run_dir, exist_ok=True) # copy the most-recent game snapshot so we can resume from it src = os.path.join(original_run_dir, "lmvsgame.json") - dst = os.path.join(run_dir, "lmvsgame.json") + dst = os.path.join(run_dir, "lmvsgame.json") if not os.path.exists(src): raise FileNotFoundError(f"No saved game found at {src}") if not os.path.exists(dst): shutil.copy2(src, dst) - is_resuming = True # we *are* continuing a game + is_resuming = True # we *are* continuing a game logger.info( "Critical state analysis: resuming from %s, writing new results to %s", - original_run_dir, run_dir, + original_run_dir, + run_dir, ) - if not run_dir: # Default behavior: create a new timestamped directory timestamp_str = time.strftime("%Y%m%d_%H%M%S") run_dir = f"./results/{timestamp_str}" - + os.makedirs(run_dir, exist_ok=True) logger.info(f"Using result directory: {run_dir}") # --- 2. Setup Logging and File Paths --- general_log_file_path = os.path.join(run_dir, "general_game.log") - file_handler = logging.FileHandler(general_log_file_path, mode='a') - file_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - [%(funcName)s:%(lineno)d] - %(message)s", datefmt="%Y-%m-%d %H:%M:%S") + file_handler = logging.FileHandler(general_log_file_path, mode="a") + file_formatter = logging.Formatter( + "%(asctime)s - %(levelname)s - %(name)s - [%(funcName)s:%(lineno)d] - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) file_handler.setFormatter(file_formatter) file_handler.setLevel(logging.INFO) logging.getLogger().addHandler(file_handler) @@ -300,33 +324,41 @@ async def main(): game_file_name = "lmvsgame.json" game_file_path = os.path.join(run_dir, game_file_name) llm_log_file_path = os.path.join(run_dir, "llm_responses.csv") - model_error_stats = defaultdict(lambda: {"conversation_errors": 0, "order_decoding_errors": 0}) + model_error_stats = defaultdict( + lambda: {"conversation_errors": 0, "order_decoding_errors": 0} + ) # --- 3. Initialize or Load Game State --- game: Game agents: Dict[str, DiplomacyAgent] game_history: GameHistory - run_config: Namespace = args # Default to current args + run_config: Namespace = args # Default to current args if is_resuming: try: # When resuming, we always use the provided params (they will override the params used in the saved state) - game, agents, game_history, _ = load_game_state(run_dir, game_file_name, run_config, args.resume_from_phase) + game, agents, game_history, _ = load_game_state( + run_dir, game_file_name, run_config, args.resume_from_phase + ) - logger.info(f"Successfully resumed game from phase: {game.get_current_phase()}.") + logger.info( + f"Successfully resumed game from phase: {game.get_current_phase()}." + ) except (FileNotFoundError, ValueError) as e: logger.error(f"Could not resume game: {e}. Starting a new game instead.") - is_resuming = False # Fallback to new game - + is_resuming = False # Fallback to new game + if not is_resuming: game = Game() game_history = GameHistory() if not hasattr(game, "phase_summaries"): game.phase_summaries = {} - agents = await initialize_new_game(run_config, game, game_history, llm_log_file_path) + agents = await initialize_new_game( + run_config, game, game_history, llm_log_file_path + ) if _detect_victory(game): - game.is_game_done = True # short-circuit the main loop + game.is_game_done = True # short-circuit the main loop logger.info( "Game already complete on load – a power has ≥18 centres " f"(current phase {game.get_current_phase()})." @@ -344,27 +376,42 @@ async def main(): logger.info(f"Reached max year {run_config.max_year}, stopping simulation.") break if run_config.end_at_phase and current_phase == run_config.end_at_phase: - logger.info(f"Reached end phase {run_config.end_at_phase}, stopping simulation.") + logger.info( + f"Reached end phase {run_config.end_at_phase}, stopping simulation." + ) break - logger.info(f"PHASE: {current_phase} (time so far: {time.time() - start_whole:.2f}s)") + logger.info( + f"PHASE: {current_phase} (time so far: {time.time() - start_whole:.2f}s)" + ) game_history.add_phase(current_phase) # --- 4b. Pre-Order Generation Steps (Movement Phases Only) --- if current_short_phase.endswith("M"): if run_config.num_negotiation_rounds > 0: game_history = await conduct_negotiations( - game, agents, game_history, model_error_stats, - max_rounds=run_config.num_negotiation_rounds, log_file_path=llm_log_file_path, + game, + agents, + game_history, + model_error_stats, + max_rounds=run_config.num_negotiation_rounds, + log_file_path=llm_log_file_path, ) if run_config.planning_phase: await planning_phase( - game, agents, game_history, model_error_stats, log_file_path=llm_log_file_path, + game, + agents, + game_history, + model_error_stats, + log_file_path=llm_log_file_path, ) - + neg_diary_tasks = [ - agent.generate_negotiation_diary_entry(game, game_history, llm_log_file_path) - for agent in agents.values() if not game.powers[agent.power_name].is_eliminated() + agent.generate_negotiation_diary_entry( + game, game_history, llm_log_file_path + ) + for agent in agents.values() + if not game.powers[agent.power_name].is_eliminated() ] if neg_diary_tasks: await asyncio.gather(*neg_diary_tasks, return_exceptions=True) @@ -374,14 +421,17 @@ async def main(): consolidation_future = None if current_short_phase.startswith("S") and current_short_phase.endswith("M"): consolidation_tasks = [ - run_diary_consolidation(agent, game, llm_log_file_path, - prompts_dir=agent.prompts_dir) + run_diary_consolidation( + agent, game, llm_log_file_path, prompts_dir=agent.prompts_dir + ) for agent in agents.values() if not game.powers[agent.power_name].is_eliminated() ] if consolidation_tasks: # Start consolidation tasks but don't await yet - consolidation_future = asyncio.gather(*consolidation_tasks, return_exceptions=True) + consolidation_future = asyncio.gather( + *consolidation_tasks, return_exceptions=True + ) # Order Generation (proceeds with current diary state) logger.info("Getting orders from agents...") @@ -393,35 +443,48 @@ async def main(): if not possible_orders: game.set_orders(power_name, []) continue - + order_tasks.append( get_valid_orders( - game, agent.client, board_state, power_name, possible_orders, - game_history, model_error_stats, - agent_goals=agent.goals, agent_relationships=agent.relationships, - agent_private_diary_str=agent.get_latest_phase_diary_entries(), # only include latest phase in orders prompt - log_file_path=llm_log_file_path, phase=current_phase, + game, + agent.client, + board_state, + power_name, + possible_orders, + game_history, + model_error_stats, + agent_goals=agent.goals, + agent_relationships=agent.relationships, + agent_private_diary_str=agent.get_latest_phase_diary_entries(), # only include latest phase in orders prompt + log_file_path=llm_log_file_path, + phase=current_phase, ) ) - + order_results = await asyncio.gather(*order_tasks, return_exceptions=True) - + # Ensure consolidation completes before proceeding to diary entries if consolidation_future: await consolidation_future - - active_powers = [p for p, a in agents.items() if not game.powers[p].is_eliminated()] - order_power_names = [p for p in active_powers if gather_possible_orders(game, p)] + + active_powers = [ + p for p, a in agents.items() if not game.powers[p].is_eliminated() + ] + order_power_names = [ + p for p in active_powers if gather_possible_orders(game, p) + ] submitted_orders_this_phase = defaultdict(list) for i, result in enumerate(order_results): p_name = order_power_names[i] if isinstance(result, Exception): - logger.error("Error getting orders for %s: %s", p_name, result, exc_info=result) + logger.error( + "Error getting orders for %s: %s", p_name, result, exc_info=result + ) valid, invalid = [], [] else: - valid = result.get("valid", []) + valid = result.get("valid", []) invalid = result.get("invalid", []) # what the engine will actually execute @@ -431,12 +494,12 @@ async def main(): submitted_orders_this_phase[p_name] = valid + invalid # diary entry only for the orders we tried to submit - if False: # disabled for now + if False: # disabled for now if valid or invalid: await agents[p_name].generate_order_diary_entry( game, valid + invalid, llm_log_file_path ) - + # --- 4d. Process Phase --- completed_phase = current_phase game.process() @@ -452,44 +515,69 @@ async def main(): phase_obj_in_my_history = game_history._get_phase(completed_phase) if phase_obj_in_my_history: # Store the orders the agents generated - phase_obj_in_my_history.submitted_orders_by_power = submitted_orders_this_phase + phase_obj_in_my_history.submitted_orders_by_power = ( + submitted_orders_this_phase + ) # Store the orders the engine actually accepted - phase_obj_in_my_history.orders_by_power = last_phase_from_game.orders - + phase_obj_in_my_history.orders_by_power = ( + last_phase_from_game.orders + ) + # Store the results for the accepted orders converted_results = defaultdict(list) if last_phase_from_game.results: for pwr, res_list in last_phase_from_game.results.items(): converted_results[pwr] = [[res] for res in res_list] phase_obj_in_my_history.results_by_power = converted_results - logger.debug(f"Populated submitted/accepted order and result history for phase {completed_phase}.") + logger.debug( + f"Populated submitted/accepted order and result history for phase {completed_phase}." + ) - phase_summary = game.phase_summaries.get(current_phase, "(Summary not generated)") + phase_summary = game.phase_summaries.get( + current_phase, "(Summary not generated)" + ) all_orders_this_phase = game.order_history.get(current_short_phase, {}) - + # Phase Result Diary Entries if current_short_phase.endswith("M"): phase_result_diary_tasks = [ - agent.generate_phase_result_diary_entry(game, game_history, phase_summary, all_orders_this_phase, llm_log_file_path, current_short_phase) - for agent in agents.values() if not game.powers[agent.power_name].is_eliminated() + agent.generate_phase_result_diary_entry( + game, + game_history, + phase_summary, + all_orders_this_phase, + llm_log_file_path, + current_short_phase, + ) + for agent in agents.values() + if not game.powers[agent.power_name].is_eliminated() ] if phase_result_diary_tasks: await asyncio.gather(*phase_result_diary_tasks, return_exceptions=True) - - # Agent State Updates - if current_short_phase.endswith("M") and run_config.num_negotiation_rounds == 0: # r'ships are updated in negotiation round. otherwise in no press, updated in a separate step. + if ( + current_short_phase.endswith("M") and run_config.num_negotiation_rounds == 0 + ): # r'ships are updated in negotiation round. otherwise in no press, updated in a separate step. current_board_state = game.get_state() state_update_tasks = [ - agent.analyze_phase_and_update_state(game, current_board_state, phase_summary, game_history, llm_log_file_path) - for agent in agents.values() if not game.powers[agent.power_name].is_eliminated() + agent.analyze_phase_and_update_state( + game, + current_board_state, + phase_summary, + game_history, + llm_log_file_path, + ) + for agent in agents.values() + if not game.powers[agent.power_name].is_eliminated() ] if state_update_tasks: await asyncio.gather(*state_update_tasks, return_exceptions=True) # --- 4f. Save State At End of Phase --- - await save_game_state(game, agents, game_history, game_file_path, run_config, completed_phase) + await save_game_state( + game, agents, game_history, game_file_path, run_config, completed_phase + ) logger.info(f"Phase {current_phase} took {time.time() - phase_start:.2f}s") # --- 5. Game End --- @@ -502,14 +590,16 @@ async def main(): # ---- make Namespace JSON-safe ---------------------------------- cfg = vars(run_config).copy() if "prompts_dir_map" in cfg and isinstance(cfg["prompts_dir_map"], dict): - cfg["prompts_dir_map"] = {p: str(path) for p, path in cfg["prompts_dir_map"].items()} + cfg["prompts_dir_map"] = { + p: str(path) for p, path in cfg["prompts_dir_map"].items() + } # ---------------------------------------------------------------- overview_file.write(json.dumps(model_error_stats) + "\n") - overview_file.write(json.dumps(getattr(game, 'power_model_map', {})) + "\n") + overview_file.write(json.dumps(getattr(game, "power_model_map", {})) + "\n") overview_file.write(json.dumps(cfg) + "\n") logger.info("Done.") if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main())