AI_Diplomacy/analysis/make_all_analysis_data.py
2025-07-15 22:43:25 -04:00

113 lines
No EOL
4.9 KiB
Python

#!/usr/bin/env python3
"""
Orchestration script to run all three analysis scripts in sequence:
1. 1_make_longform_orders_data.py
2. 2_make_convo_data.py
3. 3_make_phase_data.py
This script passes the same CLI arguments to each script.
eg
# Basic usage with all parameters
python analysis/make_all_analysis_data.py --selected_game game1 game2 --game_data_folder "/path/to/Game Data" --output_folder "/path/to/Game Data - Analysis" --analysis_folder "/path/to/analysis"
# Using output_folder as analysis_folder
python analysis/make_all_analysis_data.py --selected_game game1 --game_data_folder "/path/to/Game Data" --output_folder "/path/to/Game Data - Analysis"
# or leave out to process all games in the data folder
python analysis/make_all_analysis_data.py --game_data_folder "/path/to/Game Data" --output_folder "/path/to/Game Data - Analysis"
"""
import argparse
import os
from pathlib import Path
import pandas as pd
from tqdm import tqdm
from analysis.p1_make_longform_orders_data import make_longform_order_data
from analysis.p2_make_convo_data import make_conversation_data
from analysis.p3_make_phase_data import make_phase_data
from analysis.analysis_helpers import process_standard_game_inputs, process_game_in_zip
from typing import Dict
def process_game_data_from_folders(game_name : str, game_path : Path) -> Dict[str, pd.DataFrame]:
"""Reads log data from folder and makes analytic data sets"""
game_data : dict[str, pd.DataFrame] = process_standard_game_inputs(game_data_folder=game_path, selected_game=game_name)
orders_data : pd.DataFrame = make_longform_order_data(overview=game_data["overview"],
lmvs_data=game_data["lmvs_data"],
all_responses=game_data["all_responses"])
conversations_data : pd.DataFrame = make_conversation_data(overview=game_data["overview"], lmvs_data=game_data["lmvs_data"])
phase_data : pd.DataFrame = make_phase_data(overview=game_data["overview"],
lmvs_data=game_data["lmvs_data"],
conversations_data=conversations_data,
orders_data=orders_data)
return {"orders_data": orders_data, "conversations_data": conversations_data, "phase_data": phase_data}
def process_game_data_from_zip(zip_path : Path, game_name : str) -> Dict[str, pd.DataFrame]:
"""Reads log data from zip and makes analytic data sets"""
game_data : dict[str, pd.DataFrame] = process_game_in_zip(zip_path=zip_path, selected_game=game_name)
orders_data : pd.DataFrame = make_longform_order_data(overview=game_data["overview"],
lmvs_data=game_data["lmvs_data"],
all_responses=game_data["all_responses"])
conversations_data : pd.DataFrame = make_conversation_data(overview=game_data["overview"], lmvs_data=game_data["lmvs_data"])
phase_data : pd.DataFrame = make_phase_data(overview=game_data["overview"],
lmvs_data=game_data["lmvs_data"],
conversations_data=conversations_data,
orders_data=orders_data)
return {"orders_data": orders_data, "conversations_data": conversations_data, "phase_data": phase_data}
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run all three analysis scripts in sequence with the same arguments.")
parser.add_argument(
"--selected_game",
type=str,
nargs='*',
help="One or more specific games to process. If not provided, all games in the data folder will be processed."
)
parser.add_argument(
"--game_data_folder",
type=str,
required=True,
help="The folder where game data is stored."
)
parser.add_argument(
"--analysis_folder",
type=str,
required=True,
help="The folder to save the new analysis folders and files"
)
args = parser.parse_args()
# Convert namespace to dictionary
args_dict = vars(args)
args_dict["analysis_folder"] = Path(args_dict["analysis_folder"])
args_dict["game_data_folder"] = Path(args_dict["game_data_folder"])
games_to_process = args.selected_game
if not games_to_process:
games_to_process = os.listdir(args_dict["game_data_folder"])
for game in tqdm(games_to_process, desc="Processing games"):
game_path = args_dict["game_data_folder"] / game
if not game_path.is_dir():
continue
try:
results = process_game_data_from_folders(game_name=game, game_path=args_dict["game_data_folder"])
for data_set, df in results.items():
output_path = args_dict["analysis_folder"] / data_set / f"{game}_{data_set}.csv"
df.to_csv(output_path, index=False)
except Exception as e:
print(f"Error processing game {game}: {e}")