#!/usr/bin/env python3 """ Orchestration script to run all three analysis scripts in sequence: 1. 1_make_longform_orders_data.py 2. 2_make_convo_data.py 3. 3_make_phase_data.py This script passes the same CLI arguments to each script. eg # Basic usage with all parameters python analysis/make_all_analysis_data.py --selected_game game1 game2 --game_data_folder "/path/to/Game Data" --output_folder "/path/to/Game Data - Analysis" --analysis_folder "/path/to/analysis" # Using output_folder as analysis_folder python analysis/make_all_analysis_data.py --selected_game game1 --game_data_folder "/path/to/Game Data" --output_folder "/path/to/Game Data - Analysis" # or leave out to process all games in the data folder python analysis/make_all_analysis_data.py --game_data_folder "/path/to/Game Data" --output_folder "/path/to/Game Data - Analysis" """ import argparse import os from pathlib import Path import pandas as pd from tqdm import tqdm from analysis.p1_make_longform_orders_data import make_longform_order_data from analysis.p2_make_convo_data import make_conversation_data from analysis.p3_make_phase_data import make_phase_data from analysis.analysis_helpers import process_standard_game_inputs, process_game_in_zip from typing import Dict def process_game_data_from_folders(game_name : str, game_path : Path) -> Dict[str, pd.DataFrame]: """Reads log data from folder and makes analytic data sets""" game_data : dict[str, pd.DataFrame] = process_standard_game_inputs(game_data_folder=game_path, selected_game=game_name) orders_data : pd.DataFrame = make_longform_order_data(overview=game_data["overview"], lmvs_data=game_data["lmvs_data"], all_responses=game_data["all_responses"]) conversations_data : pd.DataFrame = make_conversation_data(overview=game_data["overview"], lmvs_data=game_data["lmvs_data"]) phase_data : pd.DataFrame = make_phase_data(overview=game_data["overview"], lmvs_data=game_data["lmvs_data"], conversations_data=conversations_data, orders_data=orders_data) return {"orders_data": orders_data, "conversations_data": conversations_data, "phase_data": phase_data} def process_game_data_from_zip(zip_path : Path, game_name : str) -> Dict[str, pd.DataFrame]: """Reads log data from zip and makes analytic data sets""" game_data : dict[str, pd.DataFrame] = process_game_in_zip(zip_path=zip_path, selected_game=game_name) orders_data : pd.DataFrame = make_longform_order_data(overview=game_data["overview"], lmvs_data=game_data["lmvs_data"], all_responses=game_data["all_responses"]) conversations_data : pd.DataFrame = make_conversation_data(overview=game_data["overview"], lmvs_data=game_data["lmvs_data"]) phase_data : pd.DataFrame = make_phase_data(overview=game_data["overview"], lmvs_data=game_data["lmvs_data"], conversations_data=conversations_data, orders_data=orders_data) return {"orders_data": orders_data, "conversations_data": conversations_data, "phase_data": phase_data} if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run all three analysis scripts in sequence with the same arguments.") parser.add_argument( "--selected_game", type=str, nargs='*', help="One or more specific games to process. If not provided, all games in the data folder will be processed." ) parser.add_argument( "--game_data_folder", type=str, required=True, help="The folder where game data is stored." ) parser.add_argument( "--analysis_folder", type=str, required=True, help="The folder to save the new analysis folders and files" ) args = parser.parse_args() # Convert namespace to dictionary args_dict = vars(args) args_dict["analysis_folder"] = Path(args_dict["analysis_folder"]) args_dict["game_data_folder"] = Path(args_dict["game_data_folder"]) games_to_process = args.selected_game if not games_to_process: games_to_process = os.listdir(args_dict["game_data_folder"]) for game in tqdm(games_to_process, desc="Processing games"): game_path = args_dict["game_data_folder"] / game if not game_path.is_dir(): continue try: results = process_game_data_from_folders(game_name=game, game_path=args_dict["game_data_folder"]) for data_set, df in results.items(): output_path = args_dict["analysis_folder"] / data_set / f"{game}_{data_set}.csv" df.to_csv(output_path, index=False) except Exception as e: print(f"Error processing game {game}: {e}")