mirror of
https://github.com/GoodStartLabs/AI_Diplomacy.git
synced 2026-04-19 12:58:09 +00:00
Moved all the files that don't need to be at top level to the experiments folder. Started using uv to init the project, should the install easier.
158 lines
7.1 KiB
Python
158 lines
7.1 KiB
Python
import pandas as pd
|
|
import json
|
|
import os
|
|
import argparse
|
|
import traceback
|
|
|
|
def parse_success_value(value):
|
|
"""Converts success string to boolean or keeps as is if not clearly boolean."""
|
|
if isinstance(value, str):
|
|
if value.lower() == 'true':
|
|
return True
|
|
elif value.lower() == 'false':
|
|
return False
|
|
elif pd.isna(value):
|
|
return None
|
|
return value
|
|
|
|
def convert_csv_to_rl_json(csv_file_path, output_json_path, game_id):
|
|
"""
|
|
Converts a CSV file of LLM responses into a JSON format suitable for RL fine-tuning.
|
|
|
|
Args:
|
|
csv_file_path (str): The absolute path to the input CSV file.
|
|
output_json_path (str): The absolute path for the output JSON file.
|
|
game_id (str): The game identifier for this conversion.
|
|
|
|
Returns:
|
|
bool: True if conversion was successful, False otherwise.
|
|
"""
|
|
try:
|
|
print(f" Attempting to read CSV: {csv_file_path}")
|
|
if not os.path.exists(csv_file_path):
|
|
print(f" Error: CSV file not found at {csv_file_path}")
|
|
return False
|
|
|
|
df = pd.read_csv(csv_file_path)
|
|
# print(f" Successfully read CSV. Shape: {df.shape} for game_id: {game_id}")
|
|
|
|
rl_data = []
|
|
for index, row in df.iterrows():
|
|
raw_response_data = row.get('raw_response')
|
|
try:
|
|
if isinstance(raw_response_data, str) and \
|
|
raw_response_data.strip().startswith(('{', '[')) and \
|
|
raw_response_data.strip().endswith(('}', ']')):
|
|
llm_response_parsed = json.loads(raw_response_data)
|
|
else:
|
|
llm_response_parsed = raw_response_data
|
|
except json.JSONDecodeError:
|
|
llm_response_parsed = raw_response_data
|
|
|
|
entry = {
|
|
"game_id": game_id,
|
|
"model": row.get('model'),
|
|
"power": row.get('power'),
|
|
"phase": row.get('phase'),
|
|
"response_type": row.get('response_type'),
|
|
"prompt": row.get('raw_input'),
|
|
"llm_response": llm_response_parsed,
|
|
"success": parse_success_value(row.get('success'))
|
|
}
|
|
rl_data.append(entry)
|
|
|
|
# Ensure output directory exists
|
|
os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
|
|
|
|
print(f" Writing JSON output to: {output_json_path}")
|
|
with open(output_json_path, 'w') as f:
|
|
json.dump(rl_data, f, indent=4)
|
|
|
|
print(f" Successfully converted CSV to JSON for game_id '{game_id}': {output_json_path}")
|
|
return True
|
|
|
|
except FileNotFoundError:
|
|
print(f" Error: The file {csv_file_path} was not found during conversion for game_id '{game_id}'.")
|
|
return False
|
|
except pd.errors.EmptyDataError:
|
|
print(f" Error: The file {csv_file_path} is empty for game_id '{game_id}'.")
|
|
return False
|
|
except Exception as e:
|
|
print(f" An unexpected error occurred during conversion for game_id '{game_id}': {e}")
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser(
|
|
description='Convert LLM responses CSV to RL-ready JSON. '
|
|
'Operates in one of two modes: single CSV file conversion or batch directory scan.'
|
|
)
|
|
group = parser.add_mutually_exclusive_group(required=True)
|
|
group.add_argument('--input_csv', type=str,
|
|
help='Path to a single input CSV file. Output JSON is saved in the same directory.')
|
|
group.add_argument('--scan_dir', type=str,
|
|
help='Path to a root directory (e.g., results/) to scan for subdirectories ending in \'FULL_GAME\'. '
|
|
'Output JSONs are saved in a \'<scan_dir>/json/\' subdirectory.')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.input_csv:
|
|
input_csv_path_arg = os.path.abspath(args.input_csv)
|
|
if not os.path.exists(input_csv_path_arg):
|
|
print(f"Error: Input CSV file does not exist at {input_csv_path_arg}")
|
|
exit(1)
|
|
|
|
# For single file mode, game_id is the name of the parent directory of the CSV
|
|
# This matches the original behavior for llm_responses.csv inside a game-specific folder
|
|
game_id_derived = os.path.basename(os.path.dirname(input_csv_path_arg))
|
|
|
|
# Output JSON in the same directory as the input CSV
|
|
output_filename = os.path.splitext(os.path.basename(input_csv_path_arg))[0] + "_rl.json"
|
|
output_json_file_path = os.path.join(os.path.dirname(input_csv_path_arg), output_filename)
|
|
|
|
print(f"Starting single file conversion for: {input_csv_path_arg}")
|
|
print(f" Game ID (derived from parent folder): {game_id_derived}")
|
|
print(f" Outputting to: {output_json_file_path}")
|
|
success = convert_csv_to_rl_json(input_csv_path_arg, output_json_file_path, game_id_derived)
|
|
if success:
|
|
print("Single file conversion successful.")
|
|
else:
|
|
print("Single file conversion failed.")
|
|
|
|
elif args.scan_dir:
|
|
scan_directory_arg = os.path.abspath(args.scan_dir)
|
|
if not os.path.isdir(scan_directory_arg):
|
|
print(f"Error: Scan directory does not exist or is not a directory: {scan_directory_arg}")
|
|
exit(1)
|
|
|
|
output_base_dir = os.path.join(scan_directory_arg, "json")
|
|
os.makedirs(output_base_dir, exist_ok=True)
|
|
|
|
print(f"Starting batch conversion. Scanning directory: {scan_directory_arg}")
|
|
print(f"Outputting all JSON files to: {output_base_dir}")
|
|
|
|
processed_games_count = 0
|
|
found_target_dirs = 0
|
|
|
|
for item_name in os.listdir(scan_directory_arg):
|
|
item_path = os.path.join(scan_directory_arg, item_name)
|
|
if os.path.isdir(item_path) and item_name.endswith("FULL_GAME"):
|
|
found_target_dirs += 1
|
|
current_game_id = item_name
|
|
csv_to_process = os.path.join(item_path, "llm_responses.csv")
|
|
output_json_for_game = os.path.join(output_base_dir, f"{current_game_id}_rl.json")
|
|
|
|
if os.path.exists(csv_to_process):
|
|
print(f"Processing game directory: {item_name}")
|
|
print(f" Input CSV: {csv_to_process}")
|
|
if convert_csv_to_rl_json(csv_to_process, output_json_for_game, current_game_id):
|
|
processed_games_count += 1
|
|
else:
|
|
print(f"Warning: 'llm_responses.csv' not found in directory {item_path}. Skipping this directory.")
|
|
|
|
if found_target_dirs == 0:
|
|
print(f"No subdirectories ending with 'FULL_GAME' found in {scan_directory_arg}.")
|
|
elif processed_games_count == 0 and found_target_dirs > 0:
|
|
print(f"Found {found_target_dirs} director(y/ies) ending with 'FULL_GAME', but none contained 'llm_responses.csv' or failed processing.")
|
|
else:
|
|
print(f"Batch conversion completed. Successfully processed {processed_games_count} out of {found_target_dirs} found 'FULL_GAME' director(y/ies).")
|