linting

2026-04-30 17:40:36 +00:00 · 2025-05-26 16:59:39 +10:00 · 2025-05-26 16:59:39 +10:00 · de3bf7c505
commit de3bf7c505
parent 98d9ef87a2
62 changed files with 1270 additions and 706 deletions
--- a/environments/community/dynastai/src/dynastai_env.py
+++ b/environments/community/dynastai/src/dynastai_env.py
@ -0,0 +1,217 @@
+"""
+DynastAI Environment - Atropos-compatible Python RL environment
+
+This module implements the DynastAI game environment as an Atropos environment.
+It integrates with FastAPI for the web interface and OpenRouter for card generation.
+"""
+
+import os
+import uuid
+from typing import Dict, List, Tuple, Union
+
+from dotenv import load_dotenv
+
+# Try to import from atroposlib, but provide fallbacks for standalone mode
+try:
+    from atroposlib.envs.base import BaseEnv, BaseEnvConfig
+    from atroposlib.envs.server_handling.server_baseline import ServerBaseline
+    from atroposlib.envs.server_handling.server_manager import APIServerConfig
+
+    HAS_ATROPOSLIB = True
+except ImportError:
+    # Create minimal stub classes for standalone mode
+    class BaseEnvConfig:
+        pass
+
+    class BaseEnv:
+        def __init__(self, *args, **kwargs):
+            pass
+
+    class ServerBaseline:
+        pass
+
+    class APIServerConfig:
+        pass
+
+    HAS_ATROPOSLIB = False
+    print("Running in standalone mode without atroposlib")
+
+from .game_logic import GameState, apply_choice_effects
+
+# Load environment variables from .env file
+load_dotenv()
+
+
+class DynastAIEnvConfig(BaseEnvConfig):
+    """
+    Configuration class for DynastAI Environment
+    """
+
+    card_template_count: int = 400  # Number of base card templates
+    api_host: str = "localhost"
+    api_port: int = 9001
+    openrouter_api_key: str = os.getenv("OPENROUTER_API_KEY", "")
+    llm_model: str = "qwen/Qwen1.5-7B"  # Default model to use
+    initial_category_weights: Dict[str, int] = {
+        "power": 50,
+        "stability": 50,
+        "piety": 50,
+        "wealth": 50,
+    }
+
+    # Optional web UI configuration
+    web_ui: bool = True
+    web_port: int = 3000
+
+
+class DynastAIEnv(BaseEnv):
+    """
+    DynastAI Atropos Environment
+    """
+
+    name = "dynastai"
+    env_config_cls = DynastAIEnvConfig
+
+    def __init__(
+        self,
+        config: DynastAIEnvConfig = None,
+        server_configs: Union[ServerBaseline, List[APIServerConfig]] = None,
+        slurm=False,
+        testing=False,
+    ):
+        if HAS_ATROPOSLIB:
+            super().__init__(config, server_configs, slurm, testing)
+
+        # In standalone mode, initialize with default config if none provided
+        if config is None:
+            config = DynastAIEnvConfig()
+
+        self.config = config
+
+        # Game state storage (in-memory keyed by session_id)
+        self.game_states: Dict[str, GameState] = {}
+
+        # Category weights for adaptive card selection
+        self.category_weights = config.initial_category_weights.copy()
+
+        # Replay storage
+        self.replay_history = []
+
+    @classmethod
+    def config_init(
+        cls,
+    ) -> Tuple[DynastAIEnvConfig, Union[ServerBaseline, List[APIServerConfig]]]:
+        """
+        Initialize the environment configuration
+        """
+        return cls.env_config_cls(), ServerBaseline()
+
+    async def reset(self):
+        """
+        Reset the environment
+        """
+        # Create a new game session
+        session_id = str(uuid.uuid4())
+        self.game_states[session_id] = GameState()
+        return {
+            "session_id": session_id,
+            "metrics": self.game_states[session_id].get_metrics(),
+        }
+
+    async def step(self, action):
+        """
+        Take a step in the environment based on the action
+
+        Parameters:
+        - action: dict containing action information including:
+            - session_id: string
+            - choice: "yes" or "no"
+
+        Returns:
+        - observation: dict containing new state
+        - reward: float
+        - done: boolean
+        - info: dict with additional info
+        """
+        session_id = action.get("session_id")
+        choice = action.get("choice")
+
+        if session_id not in self.game_states:
+            raise ValueError(f"Invalid session ID: {session_id}")
+
+        game_state = self.game_states[session_id]
+
+        # Apply the effects of the choice
+        is_game_over, metrics, effects = apply_choice_effects(game_state, choice)
+
+        # Generate observation
+        observation = {"metrics": metrics, "current_card": game_state.current_card}
+
+        # Calculate reward (0 unless game is over)
+        reward = 0.0
+        done = is_game_over
+
+        # If game is over, calculate final reward based on the adaptive reward mechanism
+        if done:
+            reward = self._calculate_adaptive_reward(game_state)
+            # Update category weights
+            self._update_category_weights(game_state)
+
+        # Additional info
+        info = {"effects": effects}
+
+        return observation, reward, done, info
+
+    def _calculate_adaptive_reward(self, game_state):
+        """
+        Calculate the adaptive reward based on the game state
+
+        R = power_final * P + stability_final * S + piety_final * Pi + wealth_final * W
+        Where:
+        - P = number of Power cards played this reign
+        - S = number of Stability cards played this reign
+        - Pi = number of Piety cards played this reign
+        - W = number of Wealth cards played this reign
+        """
+        # Get final metrics
+        metrics = game_state.get_metrics()
+
+        # Count cards by category
+        category_counts = game_state.get_category_counts()
+
+        # Calculate reward
+        reward = (
+            metrics["power"] * category_counts["power"]
+            + metrics["stability"] * category_counts["stability"]
+            + metrics["piety"] * category_counts["piety"]
+            + metrics["wealth"] * category_counts["wealth"]
+        )
+
+        return reward
+
+    def _update_category_weights(self, game_state):
+        """
+        Update category weights using exponential moving average (EMA)
+
+        weights["power"]     = 0.9 * weights["power"]     + 0.1 * (power_final     * P_last)
+        weights["stability"] = 0.9 * weights["stability"] + 0.1 * (stability_final * S_last)
+        weights["piety"]     = 0.9 * weights["piety"]     + 0.1 * (piety_final     * Pi_last)
+        weights["wealth"]    = 0.9 * weights["wealth"]    + 0.1 * (wealth_final    * W_last)
+        """
+        alpha = 0.9  # Weight for the old value
+        beta = 0.1  # Weight for the new value
+
+        # Get final metrics
+        metrics = game_state.get_metrics()
+
+        # Count cards by category
+        category_counts = game_state.get_category_counts()
+
+        # Update weights
+        for category in self.category_weights:
+            self.category_weights[category] = alpha * self.category_weights[
+                category
+            ] + beta * (metrics[category] * category_counts[category])
+
+        # Log the updated weights
+        print(f"Updated category weights: {self.category_weights}")