fixing output json & rendering!

2026-04-19 12:58:09 +00:00 · 2025-05-31 14:26:37 -04:00 · 2025-05-31 14:26:37 -04:00 · d60c969b1c
commit d60c969b1c
parent 9253b2198a
3 changed files with 377 additions and 177 deletions
--- a/analyze_game_moments_llm.py
+++ b/analyze_game_moments_llm.py
@ -18,7 +18,7 @@ import logging
 import csv
 from pathlib import Path
 from typing import Dict, List, Optional, Any
-from dataclasses import dataclass, asdict
+from dataclasses import dataclass, asdict, field
 from datetime import datetime
 import os
 from dotenv import load_dotenv
@ -48,6 +48,7 @@ class GameMoment:
    raw_messages: List[Dict]
    raw_orders: Dict
    diary_context: Dict[str, str]  # New field for diary entries
+    state_update_context: Dict[str, str] = None  # New field for state updates

@dataclass
 class Lie:
@ -75,6 +76,7 @@ class GameAnalyzer:
        self.power_to_model = None
        self.moments = []
        self.diary_entries = {}  # phase -> power -> diary content
+        self.state_updates = {}  # phase -> power -> state update content
        self.invalid_moves_by_model = {} # Initialize attribute
        self.lies = []  # Track detected lies
        self.lies_by_model = {}  # model -> {intentional: count, unintentional: count}
@ -100,6 +102,10 @@ class GameAnalyzer:
        self.diary_entries = self.parse_llm_responses_csv()
        logger.info(f"Loaded diary entries for {len(self.diary_entries)} phases")
        
+        # Load state updates from CSV
+        self.state_updates = self.parse_state_updates_csv()
+        logger.info(f"Loaded state updates for {len(self.state_updates)} phases")
+        
        # Load invalid moves data from CSV
        self.invalid_moves_by_model = self.parse_invalid_moves_from_csv()
        logger.info(f"Loaded invalid moves for {len(self.invalid_moves_by_model)} models")
@ -185,6 +191,87 @@ class GameAnalyzer:
            logger.error(f"Error parsing CSV file: {e}")
            return {}
    
+    def parse_state_updates_csv(self) -> Dict[str, Dict[str, str]]:
+        """Parse the CSV file to extract state updates by phase and power"""
+        state_updates = {}
+        
+        try:
+            import pandas as pd
+            # Use pandas for more robust CSV parsing
+            df = pd.read_csv(self.csv_path)
+            
+            # Filter for state update entries
+            state_df = df[df['response_type'] == 'state_update']
+            
+            for _, row in state_df.iterrows():
+                phase = row['phase']
+                power = row['power']
+                raw_response = str(row['raw_response']).strip()
+                
+                if phase not in state_updates:
+                    state_updates[phase] = {}
+                
+                try:
+                    # Try to parse as JSON first
+                    response = json.loads(raw_response)
+                    state_content = f"Reasoning: {response.get('reasoning', 'N/A')}\n"
+                    state_content += f"Relationships: {response.get('relationships', {})}\n"
+                    goals = response.get('goals', [])
+                    if isinstance(goals, list):
+                        state_content += f"Goals: {'; '.join(goals)}"
+                    else:
+                        state_content += f"Goals: {goals}"
+                    state_updates[phase][power] = state_content
+                except (json.JSONDecodeError, TypeError):
+                    # If JSON parsing fails, use a simplified version or skip
+                    if raw_response and raw_response.lower() not in ['null', 'nan', 'none']:
+                        state_updates[phase][power] = f"Raw state update: {raw_response}"
+                    
+            logger.info(f"Successfully parsed {len(state_updates)} phases with state updates")
+            return state_updates
+            
+        except ImportError:
+            # Fallback to standard CSV if pandas not available
+            logger.info("Pandas not available, using standard CSV parsing for state updates")
+            import csv
+            
+            with open(self.csv_path, 'r', encoding='utf-8') as f:
+                reader = csv.DictReader(f)
+                for row in reader:
+                    try:
+                        if row.get('response_type') == 'state_update':
+                            phase = row.get('phase', '')
+                            power = row.get('power', '')
+                            
+                            if phase and power:
+                                if phase not in state_updates:
+                                    state_updates[phase] = {}
+                                
+                                raw_response = row.get('raw_response', '').strip()
+                                
+                                try:
+                                    # Try to parse as JSON
+                                    response = json.loads(raw_response)
+                                    state_content = f"Reasoning: {response.get('reasoning', 'N/A')}\n"
+                                    state_content += f"Relationships: {response.get('relationships', {})}\n"
+                                    goals = response.get('goals', [])
+                                    if isinstance(goals, list):
+                                        state_content += f"Goals: {'; '.join(goals)}"
+                                    else:
+                                        state_content += f"Goals: {goals}"
+                                    state_updates[phase][power] = state_content
+                                except (json.JSONDecodeError, TypeError):
+                                    if raw_response and raw_response != "null":
+                                        state_updates[phase][power] = f"Raw state update: {raw_response}"
+                    except Exception as e:
+                        continue  # Skip problematic rows
+                        
+            return state_updates
+            
+        except Exception as e:
+            logger.error(f"Error parsing state updates from CSV file: {e}")
+            return {}
+    
    def parse_invalid_moves_from_csv(self) -> Dict[str, int]:
        """Parse the CSV file to count invalid moves by model"""
        invalid_moves_by_model = {}
@ -248,13 +335,17 @@ class GameAnalyzer:
        # Get diary entries for this phase
        phase_diaries = self.diary_entries.get(phase_name, {})
        
+        # Get state updates for this phase
+        phase_state_updates = self.state_updates.get(phase_name, {})
+        
        return {
            "phase": phase_name,
            "messages": phase_data.get("messages", []),
            "orders": phase_data.get("orders", {}),
            "summary": phase_data.get("summary", ""),
            "statistical_summary": phase_data.get("statistical_summary", {}),
-            "diaries": phase_diaries
+            "diaries": phase_diaries,
+            "state_updates": phase_state_updates
        }
    
    def create_analysis_prompt(self, turn_data: Dict) -> str:
@ -288,27 +379,43 @@ class GameAnalyzer:
            power_str = f"{power} ({power_model})" if power_model else power
            formatted_diaries.append(f"{power_str} DIARY:\n{diary}")
        
-        prompt = f"""You are analyzing diplomatic negotiations and subsequent military orders from a Diplomacy game. Your task is to identify key strategic moments in the following categories:
+        # Format state updates
+        formatted_state_updates = []
+        for power, state_update in turn_data.get("state_updates", {}).items():
+            power_model = self.power_to_model.get(power, '')
+            power_str = f"{power} ({power_model})" if power_model else power
+            formatted_state_updates.append(f"{power_str} STATE UPDATE:\n{state_update}")
+        
+        prompt = f"""You are analyzing diplomatic negotiations and subsequent military orders from a Diplomacy game. Your task is to identify ONLY the most significant strategic moments.

-1. BETRAYAL: When a power explicitly promises one action but takes a contradictory action
-2. COLLABORATION: When powers successfully coordinate as agreed
-3. PLAYING_BOTH_SIDES: When a power makes conflicting promises to different parties
-4. BRILLIANT_STRATEGY: Exceptionally well-executed strategic maneuvers that gain significant advantage
-5. STRATEGIC_BLUNDER: Major strategic mistakes that significantly weaken a power's position
+CRITICAL: 90% of game turns contain NO moments worth reporting. Only identify moments that meet these strict criteria:

-IMPORTANT SCORING GUIDELINES:
- Scores 1-3: Minor or routine diplomatic events
- Scores 4-6: Significant but expected diplomatic maneuvers
- Scores 7-8: Notable strategic moments with clear impact
- Scores 9-10: EXCEPTIONAL moments that are truly dramatic or game-changing
+CATEGORIES:
+1. BETRAYAL: Explicit promise broken that directly causes supply center loss
+2. COLLABORATION: Successful coordination that captures/defends supply centers
+3. PLAYING_BOTH_SIDES: Conflicting promises that manipulate the game's outcome
+4. BRILLIANT_STRATEGY: Moves that gain 2+ centers or save from elimination
+5. STRATEGIC_BLUNDER: Errors that lose 2+ centers or enable enemy victory

-Reserve high scores (8+) for:
- Major betrayals that fundamentally shift alliances
- Successful coordinated attacks on major powers
- Clever deceptions that fool multiple powers
- Brilliant strategic maneuvers that dramatically improve position
- Catastrophic strategic errors with lasting consequences
- Actions that dramatically alter the game's balance
+STRICT SCORING RUBRIC:
+- Scores 1-6: DO NOT REPORT THESE. Routine diplomacy, expected moves.
+- Score 7: Supply center changes hands due to this specific action
+- Score 8: Multiple centers affected or major power dynamic shift
+- Score 9: Completely alters the game trajectory (power eliminated, alliance system collapses)
+- Score 10: Once-per-game brilliance or catastrophe that determines the winner
+
+REQUIREMENTS FOR ANY REPORTED MOMENT:
+✓ Supply centers must change hands as a direct result
+✓ The action must be surprising given prior context
+✓ The impact must be immediately measurable
+✓ This must be a top-20 moment in the entire game
+
+Examples of what NOT to report:
+- Routine support orders that work as planned
+- Minor position improvements
+- Vague diplomatic promises
+- Failed attacks with no consequences
+- Defensive holds that maintain status quo

 For this turn ({turn_data.get('phase', '')}), analyze:

@ -324,6 +431,9 @@ ORDERS:
 TURN SUMMARY:
 {turn_data.get('summary', 'No summary available')}

+STATE UPDATES (Powers' reactions after seeing results):
+{chr(10).join(formatted_state_updates) if formatted_state_updates else 'No state updates available'}
+
 Identify ALL instances that fit the five categories. For each instance provide:
 {{
    "category": "BETRAYAL" or "COLLABORATION" or "PLAYING_BOTH_SIDES" or "BRILLIANT_STRATEGY" or "STRATEGIC_BLUNDER",
@ -391,7 +501,8 @@ PROVIDE YOUR RESPONSE BELOW:"""
                    interest_score=float(moment.get("interest_score", 5)),
                    raw_messages=turn_data["messages"],
                    raw_orders=turn_data["orders"],
-                    diary_context=turn_data["diaries"]
+                    diary_context=turn_data["diaries"],
+                    state_update_context=turn_data["state_updates"]
                )
                moments.append(game_moment)
                logger.info(f"Detected {game_moment.category} in {game_moment.phase} "
@ -663,7 +774,7 @@ PROVIDE YOUR RESPONSE BELOW:"""
                        break
        
        # Create the narrative prompt
-        narrative_prompt = f"""Generate a dramatic narrative of this Diplomacy game that covers the ENTIRE game from beginning to end.
+        narrative_prompt = f"""Generate a dramatic narrative of this Diplomacy game that covers the ENTIRE game from beginning to end. You should not spend too much time on any one phase. You should be telling stories across the whole game, focusing on the most important moments. Don't repeat yourself. Really think about the art of storytelling here and how to make this engaging, highlighting both the power and the model itself, which is more interesting throughout. Make sure you call back to relationships that used to exist and how things change throughout, and culminate in a satisfying ending.

 POWER MODELS:
 {chr(10).join([f"- {power}: {model}" for power, model in self.power_to_model.items()])}
@ -700,6 +811,8 @@ Write a compelling narrative that:
 7. Names each power with their model in parentheses (e.g., "France (claude-opus-4-20250514)")
 8. Is written as a single flowing paragraph
 9. Captures the drama and tension of the entire game
+10. Is well formatted with great spacing that makes it easy to read and breaks phases of the game into paragraphs
+11. The whole thing should be relatively concise

 PROVIDE YOUR NARRATIVE BELOW:"""
        
@ -715,14 +828,9 @@ PROVIDE YOUR NARRATIVE BELOW:"""
        """Generate the full analysis report matching the exact format of existing reports"""
        # Generate output path if not provided
        if not output_path:
-            # Create game_moments directory in project root
-            game_moments_dir = Path(os.path.dirname(os.path.abspath(__file__))) / "game_moments"
-            game_moments_dir.mkdir(exist_ok=True)
-            
-            # Extract game name from results folder
-            results_name = os.path.basename(os.path.normpath(str(self.results_folder)))
+            # Save directly in the results folder
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-            output_path = game_moments_dir / f"{results_name}_report_{timestamp}.md"
+            output_path = self.results_folder / f"game_moments_report_{timestamp}.md"
        
        # Ensure the parent directory exists
        output_path = Path(output_path)
@ -835,6 +943,13 @@ Game: {self.game_data_path}
                for power in moment.powers_involved:
                    if power in moment.diary_context:
                        report += f"_{self.format_power_with_model(power)} Diary:_ {moment.diary_context[power]}\n\n"
+                
+                # Add state update context
+                if moment.state_update_context:
+                    report += "**State Update Context (Post-Action Reflections):**\n\n"
+                    for power in moment.powers_involved:
+                        if power in moment.state_update_context:
+                            report += f"_{self.format_power_with_model(power)} State Update:_ {moment.state_update_context[power]}\n\n"
        
        # Write to file
        with open(output_path, 'w', encoding='utf-8') as f:
@ -844,76 +959,10 @@ Game: {self.game_data_path}
        return str(output_path)
    
    def save_json_results(self, output_path: Optional[str] = None) -> str:
-        """Save all moments and lies as JSON for further analysis"""
+        """Save all moments and lies as JSON in a unified format that works for both analysis and animation"""
        # Generate output path if not provided
        if not output_path:
-            # Create game_moments directory in project root
-            game_moments_dir = Path(os.path.dirname(os.path.abspath(__file__))) / "game_moments"
-            game_moments_dir.mkdir(exist_ok=True)
-            
-            # Extract game name from results folder
-            results_name = os.path.basename(os.path.normpath(str(self.results_folder)))
-            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-            output_path = game_moments_dir / f"{results_name}_data_{timestamp}.json"
-        
-        output_path = Path(output_path)
-        output_path.parent.mkdir(parents=True, exist_ok=True)
-        
-        # Prepare comprehensive data for JSON serialization
-        full_data = {
-            "metadata": {
-                "game_results_folder": str(self.results_folder),
-                "analysis_timestamp": datetime.now().isoformat(),
-                "model_used": self.model_name,
-                "game_data_path": str(self.game_data_path),
-                "power_to_model": self.power_to_model
-            },
-            "analysis_results": {
-                "moments": [self._moment_to_dict(moment) for moment in self.moments],
-                "lies": [asdict(lie) for lie in self.lies],
-                "invalid_moves_by_model": self.invalid_moves_by_model
-            },
-            "summary": {
-                "total_moments": len(self.moments),
-                "total_lies": len(self.lies),
-                "moments_by_category": {
-                    "BETRAYAL": len([m for m in self.moments if m.category == "BETRAYAL"]),
-                    "COLLABORATION": len([m for m in self.moments if m.category == "COLLABORATION"]),
-                    "PLAYING_BOTH_SIDES": len([m for m in self.moments if m.category == "PLAYING_BOTH_SIDES"]),
-                    "BRILLIANT_STRATEGY": len([m for m in self.moments if m.category == "BRILLIANT_STRATEGY"]),
-                    "STRATEGIC_BLUNDER": len([m for m in self.moments if m.category == "STRATEGIC_BLUNDER"])
-                },
-                "lies_by_power": {},
-                "intentional_lies": len([l for l in self.lies if l.intentional]),
-                "unintentional_lies": len([l for l in self.lies if not l.intentional]),
-                "score_distribution": {
-                    "9-10": len([m for m in self.moments if m.interest_score >= 9]),
-                    "7-8": len([m for m in self.moments if 7 <= m.interest_score < 9]),
-                    "4-6": len([m for m in self.moments if 4 <= m.interest_score < 7]),
-                    "1-3": len([m for m in self.moments if m.interest_score < 4])
-                }
-            },
-            "phases_analyzed": list(set(moment.phase for moment in self.moments))
-        }
-        
-        # Count lies by power
-        for lie in self.lies:
-            if lie.liar not in full_data["summary"]["lies_by_power"]:
-                full_data["summary"]["lies_by_power"][lie.liar] = 0
-            full_data["summary"]["lies_by_power"][lie.liar] += 1
-        
-        # Write to file with proper formatting
-        with open(output_path, 'w', encoding='utf-8') as f:
-            json.dump(full_data, f, indent=2, ensure_ascii=False)
-        
-        logger.info(f"JSON results saved: {output_path}")
-        return str(output_path)
-    
-    def save_moments_for_animation(self, output_path: Optional[str] = None) -> str:
-        """Save moments in the format expected by the animation project"""
-        # Generate output path if not provided
-        if not output_path:
-            # Default to moments.json in the same directory as the game data
+            # Save directly in the results folder as moments.json for direct use
            output_path = self.results_folder / "moments.json"
        
        output_path = Path(output_path)
@ -936,8 +985,9 @@ Game: {self.game_data_path}
            "scores_1_3": len([m for m in self.moments if m.interest_score < 4])
        }
        
-        # Prepare data in the expected format
-        animation_data = {
+        # Prepare unified data structure that includes all information
+        unified_data = {
+            # Animation-compatible metadata
            "metadata": {
                "timestamp": datetime.now().isoformat(),
                "generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
@ -945,43 +995,75 @@ Game: {self.game_data_path}
                "analysis_model": self.model_name,
                "total_moments": len(self.moments),
                "moment_categories": category_counts,
-                "score_distribution": score_dist
+                "score_distribution": score_dist,
+                # Additional comprehensive metadata
+                "game_results_folder": str(self.results_folder),
+                "analysis_timestamp": datetime.now().isoformat(),
+                "model_used": self.model_name,
+                "game_data_path": str(self.game_data_path),
+                "power_to_model": self.power_to_model
            },
+            # Power models at root level for animation
            "power_models": self.power_to_model,
-            "moments": []
+            # Moments at root level for animation
+            "moments": [self._moment_to_dict(moment) for moment in self.moments],
+            # Analysis results in nested structure for comprehensive analysis
+            "analysis_results": {
+                "moments": [self._moment_to_dict(moment) for moment in self.moments],
+                "lies": [asdict(lie) for lie in self.lies],
+                "invalid_moves_by_model": self.invalid_moves_by_model
+            },
+            "summary": {
+                "total_moments": len(self.moments),
+                "total_lies": len(self.lies),
+                "moments_by_category": {
+                    "BETRAYAL": category_counts["betrayals"],
+                    "COLLABORATION": category_counts["collaborations"],
+                    "PLAYING_BOTH_SIDES": category_counts["playing_both_sides"],
+                    "BRILLIANT_STRATEGY": category_counts["brilliant_strategies"],
+                    "STRATEGIC_BLUNDER": category_counts["strategic_blunders"]
+                },
+                "lies_by_power": {},
+                "intentional_lies": len([l for l in self.lies if l.intentional]),
+                "unintentional_lies": len([l for l in self.lies if not l.intentional]),
+                "score_distribution": {
+                    "9-10": score_dist["scores_9_10"],
+                    "7-8": score_dist["scores_7_8"],
+                    "4-6": score_dist["scores_4_6"],
+                    "1-3": score_dist["scores_1_3"]
+                }
+            },
+            "phases_analyzed": list(set(moment.phase for moment in self.moments))
        }
        
-        # Format moments for animation
-        for moment in self.moments:
-            # Create diary context with only the powers involved
-            diary_context = {}
-            for power in ["AUSTRIA", "ENGLAND", "FRANCE", "GERMANY", "ITALY", "RUSSIA", "TURKEY"]:
-                if power in moment.diary_context:
-                    diary_context[power] = moment.diary_context[power]
-                else:
-                    diary_context[power] = ""
-            
-            animation_moment = {
-                "phase": moment.phase,
-                "category": moment.category,
-                "powers_involved": moment.powers_involved,
-                "promise_agreement": moment.promise_agreement,
-                "actual_action": moment.actual_action,
-                "impact": moment.impact,
-                "interest_score": moment.interest_score,
-                "diary_context": diary_context
-            }
-            animation_data["moments"].append(animation_moment)
+        # Count lies by power
+        for lie in self.lies:
+            if lie.liar not in unified_data["summary"]["lies_by_power"]:
+                unified_data["summary"]["lies_by_power"][lie.liar] = 0
+            unified_data["summary"]["lies_by_power"][lie.liar] += 1
        
        # Write to file with proper formatting
        with open(output_path, 'w', encoding='utf-8') as f:
-            json.dump(animation_data, f, indent=2, ensure_ascii=False)
+            json.dump(unified_data, f, indent=2, ensure_ascii=False)
        
-        logger.info(f"Animation moments saved: {output_path}")
+        logger.info(f"Unified JSON results saved: {output_path}")
        return str(output_path)
    
+    
    def _moment_to_dict(self, moment: GameMoment) -> dict:
        """Convert a GameMoment to a dictionary with all fields"""
+        # Ensure all powers have entries in diary_context
+        all_powers = ["AUSTRIA", "ENGLAND", "FRANCE", "GERMANY", "ITALY", "RUSSIA", "TURKEY"]
+        normalized_diary = {}
+        for power in all_powers:
+            normalized_diary[power] = moment.diary_context.get(power, "")
+        
+        # Ensure all powers have entries in state_update_context
+        normalized_state_update = {}
+        state_update_context = moment.state_update_context if hasattr(moment, 'state_update_context') else {}
+        for power in all_powers:
+            normalized_state_update[power] = state_update_context.get(power, "")
+        
        return {
            "phase": moment.phase,
            "category": moment.category,
@ -992,7 +1074,8 @@ Game: {self.game_data_path}
            "interest_score": moment.interest_score,
            "raw_messages": moment.raw_messages,
            "raw_orders": moment.raw_orders,
-            "diary_context": moment.diary_context
+            "diary_context": normalized_diary,
+            "state_update_context": normalized_state_update
        }

 async def main():
@ -1003,8 +1086,7 @@ async def main():
                       help='Model to use for analysis')
    parser.add_argument('--max-phases', type=int, help='Maximum number of phases to analyze')
    parser.add_argument('--output', help='Output file path for the markdown report')
-    parser.add_argument('--json', help='Output file path for the JSON data')
-    parser.add_argument('--animation-moments', help='Output file path for animation-compatible moments.json')
+    parser.add_argument('--json', help='Output file path for the JSON data (defaults to moments.json in results folder)')
    
    args = parser.parse_args()
    
@ -1018,45 +1100,23 @@ async def main():
    await analyzer.analyze_game(max_phases=args.max_phases)
    
    # Generate coordinated outputs
-    # If neither output path is specified, generate both with matching timestamps
-    if not args.output and not args.json:
-        # Create game_moments directory in project root
-        game_moments_dir = Path(os.path.dirname(os.path.abspath(__file__))) / "game_moments"
-        game_moments_dir.mkdir(exist_ok=True)
-        
-        # Extract game name from results folder
-        results_name = os.path.basename(os.path.normpath(args.results_folder))
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        
-        # Generate both paths with same timestamp
-        report_path = game_moments_dir / f"{results_name}_report_{timestamp}.md"
-        json_path = game_moments_dir / f"{results_name}_data_{timestamp}.json"
-        
-        # Generate both outputs
-        report_path = await analyzer.generate_report(report_path)
-        json_path = analyzer.save_json_results(json_path)
-    else:
-        # Generate outputs with specified paths
-        report_path = await analyzer.generate_report(args.output)
-        json_path = analyzer.save_json_results(args.json) if args.json else None
+    # Always generate the report
+    report_path = await analyzer.generate_report(args.output)
    
-    # Generate animation moments file if requested or by default
-    animation_path = None
-    if args.animation_moments:
-        animation_path = analyzer.save_moments_for_animation(args.animation_moments)
+    # Generate JSON output - unified format that works for both analysis and animation
+    if args.json:
+        # Use the specified path
+        json_path = analyzer.save_json_results(args.json)
    else:
-        # Always generate moments.json in the results folder for animation
-        animation_path = analyzer.save_moments_for_animation()
+        # Default to moments.json in the results folder
+        json_path = analyzer.save_json_results()
    
    # Print summary
    print(f"\nAnalysis Complete!")
    print(f"Found {len(analyzer.moments)} key moments")
    print(f"Detected {len(analyzer.lies)} lies")
    print(f"\nReport saved to: {report_path}")
-    if json_path:
-        print(f"JSON data saved to: {json_path}")
-    if animation_path:
-        print(f"Animation moments saved to: {animation_path}")
+    print(f"JSON data saved to: {json_path}")
    
    # Show score distribution
    print("\nScore Distribution:")