mirror of
https://github.com/GoodStartLabs/AI_Diplomacy.git
synced 2026-04-19 12:58:09 +00:00
158 lines
7.1 KiB
Python
158 lines
7.1 KiB
Python
import pandas as pd
|
|
import json
|
|
import os
|
|
import argparse
|
|
import traceback
|
|
|
|
def parse_success_value(value):
|
|
"""Converts success string to boolean or keeps as is if not clearly boolean."""
|
|
if isinstance(value, str):
|
|
if value.lower() == 'true':
|
|
return True
|
|
elif value.lower() == 'false':
|
|
return False
|
|
elif pd.isna(value):
|
|
return None
|
|
return value
|
|
|
|
def convert_csv_to_rl_json(csv_file_path, output_json_path, game_id):
|
|
"""
|
|
Converts a CSV file of LLM responses into a JSON format suitable for RL fine-tuning.
|
|
|
|
Args:
|
|
csv_file_path (str): The absolute path to the input CSV file.
|
|
output_json_path (str): The absolute path for the output JSON file.
|
|
game_id (str): The game identifier for this conversion.
|
|
|
|
Returns:
|
|
bool: True if conversion was successful, False otherwise.
|
|
"""
|
|
try:
|
|
print(f" Attempting to read CSV: {csv_file_path}")
|
|
if not os.path.exists(csv_file_path):
|
|
print(f" Error: CSV file not found at {csv_file_path}")
|
|
return False
|
|
|
|
df = pd.read_csv(csv_file_path)
|
|
# print(f" Successfully read CSV. Shape: {df.shape} for game_id: {game_id}")
|
|
|
|
rl_data = []
|
|
for index, row in df.iterrows():
|
|
raw_response_data = row.get('raw_response')
|
|
try:
|
|
if isinstance(raw_response_data, str) and \
|
|
raw_response_data.strip().startswith(('{', '[')) and \
|
|
raw_response_data.strip().endswith(('}', ']')):
|
|
llm_response_parsed = json.loads(raw_response_data)
|
|
else:
|
|
llm_response_parsed = raw_response_data
|
|
except json.JSONDecodeError:
|
|
llm_response_parsed = raw_response_data
|
|
|
|
entry = {
|
|
"game_id": game_id,
|
|
"model": row.get('model'),
|
|
"power": row.get('power'),
|
|
"phase": row.get('phase'),
|
|
"response_type": row.get('response_type'),
|
|
"prompt": row.get('raw_input'),
|
|
"llm_response": llm_response_parsed,
|
|
"success": parse_success_value(row.get('success'))
|
|
}
|
|
rl_data.append(entry)
|
|
|
|
# Ensure output directory exists
|
|
os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
|
|
|
|
print(f" Writing JSON output to: {output_json_path}")
|
|
with open(output_json_path, 'w') as f:
|
|
json.dump(rl_data, f, indent=4)
|
|
|
|
print(f" Successfully converted CSV to JSON for game_id '{game_id}': {output_json_path}")
|
|
return True
|
|
|
|
except FileNotFoundError:
|
|
print(f" Error: The file {csv_file_path} was not found during conversion for game_id '{game_id}'.")
|
|
return False
|
|
except pd.errors.EmptyDataError:
|
|
print(f" Error: The file {csv_file_path} is empty for game_id '{game_id}'.")
|
|
return False
|
|
except Exception as e:
|
|
print(f" An unexpected error occurred during conversion for game_id '{game_id}': {e}")
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser(
|
|
description='Convert LLM responses CSV to RL-ready JSON. '
|
|
'Operates in one of two modes: single CSV file conversion or batch directory scan.'
|
|
)
|
|
group = parser.add_mutually_exclusive_group(required=True)
|
|
group.add_argument('--input_csv', type=str,
|
|
help='Path to a single input CSV file. Output JSON is saved in the same directory.')
|
|
group.add_argument('--scan_dir', type=str,
|
|
help='Path to a root directory (e.g., results/) to scan for subdirectories ending in \'FULL_GAME\'. '
|
|
'Output JSONs are saved in a \'<scan_dir>/json/\' subdirectory.')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.input_csv:
|
|
input_csv_path_arg = os.path.abspath(args.input_csv)
|
|
if not os.path.exists(input_csv_path_arg):
|
|
print(f"Error: Input CSV file does not exist at {input_csv_path_arg}")
|
|
exit(1)
|
|
|
|
# For single file mode, game_id is the name of the parent directory of the CSV
|
|
# This matches the original behavior for llm_responses.csv inside a game-specific folder
|
|
game_id_derived = os.path.basename(os.path.dirname(input_csv_path_arg))
|
|
|
|
# Output JSON in the same directory as the input CSV
|
|
output_filename = os.path.splitext(os.path.basename(input_csv_path_arg))[0] + "_rl.json"
|
|
output_json_file_path = os.path.join(os.path.dirname(input_csv_path_arg), output_filename)
|
|
|
|
print(f"Starting single file conversion for: {input_csv_path_arg}")
|
|
print(f" Game ID (derived from parent folder): {game_id_derived}")
|
|
print(f" Outputting to: {output_json_file_path}")
|
|
success = convert_csv_to_rl_json(input_csv_path_arg, output_json_file_path, game_id_derived)
|
|
if success:
|
|
print("Single file conversion successful.")
|
|
else:
|
|
print("Single file conversion failed.")
|
|
|
|
elif args.scan_dir:
|
|
scan_directory_arg = os.path.abspath(args.scan_dir)
|
|
if not os.path.isdir(scan_directory_arg):
|
|
print(f"Error: Scan directory does not exist or is not a directory: {scan_directory_arg}")
|
|
exit(1)
|
|
|
|
output_base_dir = os.path.join(scan_directory_arg, "json")
|
|
os.makedirs(output_base_dir, exist_ok=True)
|
|
|
|
print(f"Starting batch conversion. Scanning directory: {scan_directory_arg}")
|
|
print(f"Outputting all JSON files to: {output_base_dir}")
|
|
|
|
processed_games_count = 0
|
|
found_target_dirs = 0
|
|
|
|
for item_name in os.listdir(scan_directory_arg):
|
|
item_path = os.path.join(scan_directory_arg, item_name)
|
|
if os.path.isdir(item_path) and item_name.endswith("FULL_GAME"):
|
|
found_target_dirs += 1
|
|
current_game_id = item_name
|
|
csv_to_process = os.path.join(item_path, "llm_responses.csv")
|
|
output_json_for_game = os.path.join(output_base_dir, f"{current_game_id}_rl.json")
|
|
|
|
if os.path.exists(csv_to_process):
|
|
print(f"Processing game directory: {item_name}")
|
|
print(f" Input CSV: {csv_to_process}")
|
|
if convert_csv_to_rl_json(csv_to_process, output_json_for_game, current_game_id):
|
|
processed_games_count += 1
|
|
else:
|
|
print(f"Warning: 'llm_responses.csv' not found in directory {item_path}. Skipping this directory.")
|
|
|
|
if found_target_dirs == 0:
|
|
print(f"No subdirectories ending with 'FULL_GAME' found in {scan_directory_arg}.")
|
|
elif processed_games_count == 0 and found_target_dirs > 0:
|
|
print(f"Found {found_target_dirs} director(y/ies) ending with 'FULL_GAME', but none contained 'llm_responses.csv' or failed processing.")
|
|
else:
|
|
print(f"Batch conversion completed. Successfully processed {processed_games_count} out of {found_target_dirs} found 'FULL_GAME' director(y/ies).")
|