Add support for willow-alpha-2025-11-03 model

Added willow-alpha-2025-11-03 to OpenAI client with proper parameter handling: - Uses max_completion_tokens instead of max_tokens - Enforces temperature=1.0 (model requirement) - Added to all relevant client classes (OpenAIClient, DeepSeekClient, OpenAIResponsesClient, RequestsOpenAIClient) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-04-19 12:58:09 +00:00 · 2025-11-17 21:19:58 -05:00 · 2025-11-17 21:19:58 -05:00 · 701790a37a
commit 701790a37a
parent 4ee7c2f68a
1 changed files with 354 additions and 39 deletions
--- a/ai_diplomacy/clients.py
+++ b/ai_diplomacy/clients.py
@ -19,6 +19,7 @@ from enum import StrEnum
 import google.generativeai as genai
 from together import AsyncTogether
 from together.error import APIError as TogetherAPIError  # For specific error handling
+from groq import AsyncGroq

 from config import config
 from .game_history import GameHistory
@ -821,14 +822,14 @@ class OpenAIClient(BaseModelClient):
            # Handle model-specific parameters
            # Check if model name starts with 'nectarine' or is in the specific list
            uses_max_completion_tokens = (
-                self.model_name in ["o4-mini", "o3-mini", "o3", "gpt-4.1"] or
+                self.model_name in ["o4-mini", "o3-mini", "o3", "gpt-4.1", "gpt-5-mini", "willow-alpha-2025-11-03"] or
                self.model_name.startswith("nectarine")
            )
            
            if uses_max_completion_tokens:
                completion_params["max_completion_tokens"] = self.max_tokens
-                # o4-mini, o3-mini, o3 only support default temperature of 1.0
-                if self.model_name in ["o4-mini", "o3-mini", "o3"]:
+                # o4-mini, o3-mini, o3, gpt-5-mini, willow-alpha-2025-11-03 only support default temperature of 1.0
+                if self.model_name in ["o4-mini", "o3-mini", "o3", "gpt-5-mini", "willow-alpha-2025-11-03"]:
                    completion_params["temperature"] = 1.0
                else:
                    completion_params["temperature"] = temperature
@ -886,7 +887,30 @@ class ClaudeClient(BaseModelClient):

    def __init__(self, model_name: str, prompts_dir: Optional[str] = None):
        super().__init__(model_name, prompts_dir=prompts_dir)
-        self.client = AsyncAnthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
+
+        # Check if this is the special claude-sonnet-4 model with :1m suffix
+        self.use_context_1m = False
+        self.actual_model_name = model_name
+
+        if model_name == "claude-sonnet-4-20250514:1m":
+            self.use_context_1m = True
+            self.actual_model_name = "claude-sonnet-4-20250514"
+            logger.info(f"[{model_name}] Using context-1m-2025-08-07 beta header")
+
+        # Check if this is MiniMax-M2 model (uses Anthropic-compatible API)
+        base_url = None
+        api_key = os.environ.get("ANTHROPIC_API_KEY")
+
+        if "minimax" in model_name.lower():
+            base_url = os.environ.get("ANTHROPIC_BASE_URL", "https://api.minimax.io/anthropic")
+            api_key = os.environ.get("MINIMAX_API_KEY") or api_key
+            logger.info(f"[{model_name}] Using MiniMax API with base_url: {base_url}")
+
+        # Initialize client with optional base_url
+        if base_url:
+            self.client = AsyncAnthropic(api_key=api_key, base_url=base_url)
+        else:
+            self.client = AsyncAnthropic(api_key=api_key)

    async def generate_response(self, prompt: str, temperature: float = 0.0, inject_random_seed: bool = True) -> str:
        # Updated Claude messages format
@ -896,16 +920,42 @@ class ClaudeClient(BaseModelClient):
                random_seed = generate_random_seed()
                system_prompt_content = f"{random_seed}\n\n{self.system_prompt}"

-            response = await self.client.messages.create(
-                model=self.model_name,
-                max_tokens=self.max_tokens,
-                system=system_prompt_content,  # system is now a top-level parameter
-                messages=[{"role": "user", "content": prompt + "\n\nPROVIDE YOUR RESPONSE BELOW:"}],
-                temperature=temperature,
-            )
-            if not response.content or not response.content[0].text:
-                raise ValueError(f"[{self.model_name}] LLM returned an empty or invalid response.")
-            return response.content[0].text.strip()
+            # Prepare the create parameters
+            create_params = {
+                "model": self.actual_model_name,
+                "max_tokens": self.max_tokens,
+                "system": system_prompt_content,  # system is now a top-level parameter
+                "messages": [{"role": "user", "content": prompt + "\n\nPROVIDE YOUR RESPONSE BELOW:"}],
+                "temperature": temperature,
+            }
+            
+            # Use beta endpoint if needed
+            if self.use_context_1m:
+                create_params["betas"] = ["context-1m-2025-08-07"]
+                response = await self.client.beta.messages.create(**create_params)
+            else:
+                response = await self.client.messages.create(**create_params)
+
+            if not response.content:
+                raise ValueError(f"[{self.model_name}] LLM returned an empty response.")
+
+            # Extract text from response content blocks
+            # Handle both TextBlock and ThinkingBlock (for MiniMax and reasoning models)
+            text_content = ""
+            for content_block in response.content:
+                if hasattr(content_block, 'text'):
+                    # TextBlock with direct text attribute
+                    text_content += content_block.text
+                elif hasattr(content_block, 'thinking'):
+                    # ThinkingBlock - skip the thinking, we want the output text
+                    continue
+                elif content_block.type == 'text' and hasattr(content_block, 'text'):
+                    text_content += content_block.text
+
+            if not text_content:
+                raise ValueError(f"[{self.model_name}] LLM returned content but no text could be extracted.")
+
+            return text_content.strip()
        except json.JSONDecodeError as json_err:
            logger.error(f"[{self.model_name}] JSON decoding failed in generate_response: {json_err}")
            raise
@ -1000,13 +1050,13 @@ class DeepSeekClient(BaseModelClient):
                "stream": False,
                "temperature": temperature,
            }
-            
-            # Use max_completion_tokens for o4-mini, o3-mini models and nectarine models
-            if self.model_name in ["o4-mini", "o3-mini"] or self.model_name.startswith("nectarine"):
+
+            # Use max_completion_tokens for o4-mini, o3-mini, willow-alpha-2025-11-03 models and nectarine models
+            if self.model_name in ["o4-mini", "o3-mini", "willow-alpha-2025-11-03"] or self.model_name.startswith("nectarine"):
                completion_params["max_completion_tokens"] = self.max_tokens
            else:
                completion_params["max_tokens"] = self.max_tokens
-            
+
            response = await self.client.chat.completions.create(**completion_params)

            logger.debug(f"[{self.model_name}] Raw DeepSeek response:\n{response}")
@ -1044,6 +1094,71 @@ class DeepSeekClient(BaseModelClient):
            raise


+class KimiClient(BaseModelClient):
+    """
+    For Kimi K2 models from Moonshot AI.
+    Models: kimi-k2-0905-preview, kimi-k2-0711-preview, kimi-k2-turbo-preview
+    """
+
+    def __init__(self, model_name: str, prompts_dir: Optional[str] = None):
+        super().__init__(model_name, prompts_dir=prompts_dir)
+        self.api_key = os.environ.get("MOONSHOT_API_KEY")
+        if not self.api_key:
+            raise ValueError("MOONSHOT_API_KEY environment variable is required for KimiClient")
+        self.client = AsyncOpenAI(api_key=self.api_key, base_url="https://api.moonshot.ai/v1")
+        logger.info(f"[{self.model_name}] Initialized Kimi client")
+
+    async def generate_response(self, prompt: str, temperature: float = 0.0, inject_random_seed: bool = True) -> str:
+        try:
+            system_prompt_content = self.system_prompt
+            if inject_random_seed:
+                random_seed = generate_random_seed()
+                system_prompt_content = f"{random_seed}\n\n{self.system_prompt}"
+
+            prompt_with_cta = f"{prompt}\n\nPROVIDE YOUR RESPONSE BELOW:"
+
+            response = await self.client.chat.completions.create(
+                model=self.model_name,
+                messages=[
+                    {"role": "system", "content": system_prompt_content},
+                    {"role": "user", "content": prompt_with_cta},
+                ],
+                max_tokens=self.max_tokens,
+                temperature=temperature,
+            )
+
+            if not response or not response.choices or not response.choices[0].message.content:
+                raise ValueError(f"[{self.model_name}] LLM returned an empty or invalid response.")
+
+            return response.choices[0].message.content.strip()
+
+        except Exception as e:
+            extra = ""
+            try:
+                from openai import OpenAIError
+                if isinstance(e, OpenAIError):
+                    status = getattr(e, "status_code", None)
+                    if status:
+                        extra += f" (status {status})"
+                    resp = getattr(e, "response", None)
+                    if resp is not None:
+                        try:
+                            body = resp.json() if hasattr(resp, "json") else resp
+                        except Exception:
+                            body = str(resp)
+                        body_str = (
+                            json.dumps(body) if isinstance(body, (dict, list)) else str(body)
+                        )
+                        if len(body_str) > 3_000:
+                            body_str = body_str[:3_000] + "…[truncated]"
+                        extra += f" – body: {body_str}"
+            except Exception:
+                pass
+
+            logger.error(f"[{self.model_name}] Kimi client error: {e}{extra}", exc_info=True)
+            raise
+
+
 class OpenAIResponsesClient(BaseModelClient):
    """
    For OpenAI o3-pro model using the new Responses API endpoint.
@ -1095,13 +1210,13 @@ class OpenAIResponsesClient(BaseModelClient):
            payload["max_output_tokens"] = self.max_tokens
            
            # Only add temperature for models that support it
-            models_without_temp = ['o3', 'o4-mini', 'gpt-5-reasoning-alpha-2025-07-19', 'nectarine-alpha-2025-07-25', 'nectarine-alpha-new-reasoning-effort-2025-07-25']
-            if self.model_name not in models_without_temp:
+            models_without_temp = ['o3', 'o4-mini', 'gpt-5-reasoning-alpha-2025-07-19', 'willow-alpha-2025-11-03']
+            if self.model_name not in models_without_temp and not self.model_name.startswith("nectarine"):
                payload["temperature"] = temperature

            # Add reasoning effort for models that support it
-            reasoning_models = ['gpt-5-reasoning-alpha-2025-07-19', 'o4-mini', 'nectarine-alpha-2025-07-25', 'o4-mini-alpha-2025-07-11', 'nectarine-alpha-new-reasoning-effort-2025-07-25']
-            if self.reasoning_effort and self.model_name in reasoning_models:
+            reasoning_models = ['gpt-5-reasoning-alpha-2025-07-19', 'gpt-5', 'o4-mini', 'o4-mini-alpha-2025-07-11']
+            if self.reasoning_effort and (self.model_name in reasoning_models or self.model_name.startswith("nectarine")):
                payload["reasoning"] = {"effort": self.reasoning_effort}

            headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
@ -1151,12 +1266,84 @@ class OpenAIResponsesClient(BaseModelClient):
            raise


+class QwenClient(BaseModelClient):
+    """
+    For Qwen models via Alibaba Cloud's DashScope API.
+    Uses OpenAI-compatible endpoint at https://dashscope-intl.aliyuncs.com/compatible-mode/v1
+    Model: qwen3-max
+    """
+
+    def __init__(self, model_name: str = "qwen3-max", prompts_dir: Optional[str] = None):
+        super().__init__(model_name, prompts_dir=prompts_dir)
+        self.api_key = os.environ.get("DASHSCOPE_API_KEY")
+        if not self.api_key:
+            raise ValueError("DASHSCOPE_API_KEY environment variable is required for QwenClient")
+
+        self.client = AsyncOpenAI(
+            api_key=self.api_key,
+            base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
+        )
+        logger.info(f"[{self.model_name}] Initialized Qwen client via DashScope API")
+
+    async def generate_response(self, prompt: str, temperature: float = 0.0, inject_random_seed: bool = True) -> str:
+        """Generate a response using Qwen via DashScope API with robust error handling."""
+        try:
+            system_prompt_content = self.system_prompt
+            if inject_random_seed:
+                random_seed = generate_random_seed()
+                system_prompt_content = f"{random_seed}\n\n{self.system_prompt}"
+
+            prompt_with_cta = f"{prompt}\n\nPROVIDE YOUR RESPONSE BELOW:"
+
+            response = await self.client.chat.completions.create(
+                model=self.model_name,
+                messages=[
+                    {"role": "system", "content": system_prompt_content},
+                    {"role": "user", "content": prompt_with_cta},
+                ],
+                max_tokens=self.max_tokens,
+                temperature=temperature,
+            )
+
+            if not response or not response.choices or not response.choices[0].message.content:
+                raise ValueError(f"[{self.model_name}] LLM returned an empty or invalid response.")
+
+            content = response.choices[0].message.content.strip()
+            return content
+
+        except Exception as e:
+            extra = ""
+            try:
+                from openai import OpenAIError
+                if isinstance(e, OpenAIError):
+                    status = getattr(e, "status_code", None)
+                    if status:
+                        extra += f" (status {status})"
+                    resp = getattr(e, "response", None)
+                    if resp is not None:
+                        try:
+                            body = resp.json() if hasattr(resp, "json") else resp
+                        except Exception:
+                            body = str(resp)
+                        body_str = (
+                            json.dumps(body) if isinstance(body, (dict, list)) else str(body)
+                        )
+                        if len(body_str) > 3_000:
+                            body_str = body_str[:3_000] + "…[truncated]"
+                        extra += f" – body: {body_str}"
+            except Exception:
+                pass
+
+            logger.error(f"[{self.model_name}] Qwen client error: {e}{extra}", exc_info=True)
+            raise
+
+
 class OpenRouterClient(BaseModelClient):
    """
    For OpenRouter models, with default being 'openrouter/quasar-alpha'
    """

-    def __init__(self, model_name: str = "openrouter/quasar-alpha", prompts_dir: Optional[str] = None):
+    def __init__(self, model_name: str = "openrouter/quasar-alpha", prompts_dir: Optional[str] = None, reasoning_effort: Optional[str] = None):
        # Allow specifying just the model identifier or the full path
        if not model_name.startswith("openrouter/") and "/" not in model_name:
            model_name = f"openrouter/{model_name}"
@ -1169,8 +1356,9 @@ class OpenRouterClient(BaseModelClient):
            raise ValueError("OPENROUTER_API_KEY environment variable is required")

        self.client = AsyncOpenAI(base_url="https://openrouter.ai/api/v1", api_key=self.api_key)
+        self.reasoning_effort = reasoning_effort  # For models that support reasoning effort

-        logger.debug(f"[{self.model_name}] Initialized OpenRouter client")
+        logger.debug(f"[{self.model_name}] Initialized OpenRouter client with reasoning_effort={reasoning_effort}")

    async def generate_response(self, prompt: str, temperature: float = 0.0, inject_random_seed: bool = True) -> str:
        """Generate a response using OpenRouter with robust error handling."""
@ -1183,13 +1371,22 @@ class OpenRouterClient(BaseModelClient):
                random_seed = generate_random_seed()
                system_prompt_content = f"{random_seed}\n\n{self.system_prompt}"

+            # Prepare request parameters
+            request_params = {
+                "model": self.model_name,
+                "messages": [{"role": "system", "content": system_prompt_content}, {"role": "user", "content": prompt_with_cta}],
+                "max_tokens": self.max_tokens,
+                "temperature": temperature,
+            }
+
+            # Add reasoning parameter for models that support it (grok-4 and other reasoning models)
+            reasoning_models = ['grok-4', 'xai/grok-4']
+            if self.reasoning_effort and any(model in self.model_name for model in reasoning_models):
+                request_params["reasoning"] = {"effort": self.reasoning_effort}
+                logger.debug(f"[{self.model_name}] Adding reasoning effort: {self.reasoning_effort}")
+
            # Prepare standard OpenAI-compatible request
-            response = await self.client.chat.completions.create(
-                model=self.model_name,
-                messages=[{"role": "system", "content": system_prompt_content}, {"role": "user", "content": prompt_with_cta}],
-                max_tokens=self.max_tokens,
-                temperature=temperature,
-            )
+            response = await self.client.chat.completions.create(**request_params)

            if not response.choices or not response.choices[0].message.content:
                raise ValueError(f"[{self.model_name}] LLM returned an empty or invalid response.")
@ -1281,6 +1478,59 @@ class TogetherAIClient(BaseModelClient):
            raise


+##############################################################################
+# GroqClient
+##############################################################################
+class GroqClient(BaseModelClient):
+    """
+    Client for Groq AI models.
+    Model names should start with 'groq-' and include the full model path.
+    Example: 'groq-openai/gpt-oss-20b' -> model sent to API: 'openai/gpt-oss-20b'
+    """
+
+    def __init__(self, model_name: str, prompts_dir: Optional[str] = None):
+        super().__init__(model_name, prompts_dir=prompts_dir)
+        self.api_key = os.environ.get("GROQ_API_KEY")
+        if not self.api_key:
+            raise ValueError("GROQ_API_KEY environment variable is required for GroqClient")
+        
+        self.client = AsyncGroq(api_key=self.api_key)
+        logger.info(f"[{self.model_name}] Initialized Groq client for model: {self.model_name}")
+
+    async def generate_response(self, prompt: str, temperature: float = 0.0, inject_random_seed: bool = True) -> str:
+        """
+        Generates a response from the Groq model.
+        """
+        try:
+            system_prompt_content = self.system_prompt
+            if inject_random_seed:
+                random_seed = generate_random_seed()
+                system_prompt_content = f"{random_seed}\n\n{self.system_prompt}"
+
+            prompt_with_cta = prompt + "\n\nPROVIDE YOUR RESPONSE BELOW:"
+
+            response = await self.client.chat.completions.create(
+                model=self.model_name,
+                messages=[
+                    {"role": "system", "content": system_prompt_content},
+                    {"role": "user", "content": prompt_with_cta},
+                ],
+                max_completion_tokens=self.max_tokens,
+                temperature=temperature,
+                stream=False,
+            )
+
+            if not response.choices or not response.choices[0].message or not response.choices[0].message.content:
+                raise ValueError(f"[{self.model_name}] LLM returned an empty or invalid response.")
+
+            content = response.choices[0].message.content
+            return content.strip()
+
+        except Exception as e:
+            logger.error(f"[{self.model_name}] Groq client error: {e}", exc_info=True)
+            raise
+
+
 ##############################################################################
 # RequestsOpenAIClient – sync requests, wrapped async (original + api_key)
 ##############################################################################
@ -1352,8 +1602,8 @@ class RequestsOpenAIClient(BaseModelClient):
            "temperature": temperature,
        }
        
-        # Use max_completion_tokens for o4-mini, o3-mini, o3, gpt-4.1 models and nectarine models
-        if self.model_name in ["o4-mini", "o3-mini", "o3", "gpt-4.1"] or self.model_name.startswith("nectarine"):
+        # Use max_completion_tokens for o4-mini, o3-mini, o3, gpt-4.1, willow-alpha-2025-11-03 models and nectarine models
+        if self.model_name in ["o4-mini", "o3-mini", "o3", "gpt-4.1", "willow-alpha-2025-11-03"] or self.model_name.startswith("nectarine"):
            payload["max_completion_tokens"] = self.max_tokens
        else:
            payload["max_tokens"] = self.max_tokens
@ -1364,7 +1614,7 @@ class RequestsOpenAIClient(BaseModelClient):
        #        "allow_fallbacks": False,
        #    }

-        if (self.model_name == 'o3' or self.model_name == 'o4-mini'):
+        if (self.model_name == 'o3' or self.model_name == 'o4-mini' or self.model_name == 'willow-alpha-2025-11-03'):
            del payload["temperature"]
            if "max_tokens" in payload:
                del payload["max_tokens"]
@ -1426,8 +1676,11 @@ class Prefix(StrEnum):
    ANTHROPIC         = "anthropic"
    GEMINI            = "gemini"
    DEEPSEEK          = "deepseek"
+    KIMI              = "kimi"
+    QWEN              = "qwen"
    OPENROUTER        = "openrouter"
    TOGETHER          = "together"
+    GROQ              = "groq"

 def load_model_client(model_id: str, prompts_dir: Optional[str] = None) -> BaseModelClient:
    """
@ -1436,19 +1689,29 @@ def load_model_client(model_id: str, prompts_dir: Optional[str] = None) -> BaseM
        anthropic:claude-3.7-sonnet
        openai:llama-3-2-3b@https://localhost:8000#myapikey
        gpt-5-reasoning-alpha-2025-07-19:minimal
+        claude-sonnet-4-20250514:1m
    and returns the appropriate client.

    • If a prefix is omitted the function falls back to the original
      heuristic mapping exactly as before.
    • If an inline API-key ('#…') is present it overrides environment vars.
    • For reasoning models, effort can be specified with :minimal, :medium, or :high
+    • For claude-sonnet-4-20250514, :1m can be specified for extended context
    """
+    # Special handling for claude-sonnet-4-20250514 with :1m suffix
+    if model_id.startswith('claude-sonnet-4-20250514'):
+        # This model doesn't use prefixes, just handle it directly
+        logger.info(f"[load_model_client] Detected claude-sonnet-4-20250514 model: {model_id}")
+        return ClaudeClient(model_id, prompts_dir)
+    
    # Extract reasoning effort if present (before general parsing)
    reasoning_effort = None
    actual_model_id = model_id
    
    # Check if this is a reasoning model with effort specified
-    reasoning_models = ['gpt-5-reasoning-alpha-2025-07-19', 'o4-mini', 'nectarine-alpha-2025-07-25', 'nectarine-alpha-new-reasoning-effort-2025-07-25']
+    reasoning_models = ['gpt-5-reasoning-alpha-2025-07-19', 'gpt-5', 'o4-mini']
+    
+    # Check for explicit reasoning models first
    for model in reasoning_models:
        if model_id.startswith(model + ':'):
            parts = model_id.split(':', 1)
@ -1460,6 +1723,34 @@ def load_model_client(model_id: str, prompts_dir: Optional[str] = None) -> BaseM
                reasoning_effort = effort_part.lower()
                break
    
+    # Check for nectarine models
+    if reasoning_effort is None and model_id.startswith('nectarine'):
+        # Check if there's an effort specified after a colon
+        if ':' in model_id:
+            parts = model_id.split(':', 1)
+            effort_part = parts[1]
+            if effort_part.lower() in ['minimal', 'medium', 'high']:
+                actual_model_id = parts[0]
+                reasoning_effort = effort_part.lower()
+
+    # Check for OpenRouter models with reasoning (e.g., openrouter:grok-4:medium)
+    if reasoning_effort is None and 'openrouter' in model_id.lower():
+        # Count colons to determine if effort is specified
+        if model_id.count(':') >= 2:
+            # Format: openrouter:grok-4:medium or similar
+            parts = model_id.rsplit(':', 1)
+            effort_part = parts[1]
+            if effort_part.lower() in ['minimal', 'medium', 'high']:
+                actual_model_id = parts[0]
+                reasoning_effort = effort_part.lower()
+        elif model_id.count(':') == 1 and not model_id.startswith('openrouter:'):
+            # Format: grok-4:medium without openrouter prefix
+            parts = model_id.split(':', 1)
+            effort_part = parts[1]
+            if effort_part.lower() in ['minimal', 'medium', 'high']:
+                actual_model_id = parts[0]
+                reasoning_effort = effort_part.lower()
+    
    spec = _parse_model_spec(actual_model_id)
    logger.info(f"[load_model_client] Loading client for model_id='{model_id}', parsed spec: prefix={spec.prefix}, model={spec.model}, reasoning_effort={reasoning_effort}")

@ -1476,7 +1767,7 @@ def load_model_client(model_id: str, prompts_dir: Optional[str] = None) -> BaseM
            raise ValueError(
                f"[load_model_client] unknown prefix '{spec.prefix}'. "
                "Allowed prefixes: openai, openai-requests, openai-responses, "
-                "anthropic, gemini, deepseek, openrouter, together."
+                "anthropic, gemini, deepseek, kimi, qwen, openrouter, together, groq."
            ) from exc

        match pref:
@ -1502,10 +1793,16 @@ def load_model_client(model_id: str, prompts_dir: Optional[str] = None) -> BaseM
                return GeminiClient(spec.model, prompts_dir)
            case Prefix.DEEPSEEK:
                return DeepSeekClient(spec.model, prompts_dir)
+            case Prefix.KIMI:
+                return KimiClient(spec.model, prompts_dir)
+            case Prefix.QWEN:
+                return QwenClient(spec.model, prompts_dir)
            case Prefix.OPENROUTER:
-                return OpenRouterClient(spec.model, prompts_dir)
+                return OpenRouterClient(spec.model, prompts_dir, reasoning_effort=reasoning_effort)
            case Prefix.TOGETHER:
                return TogetherAIClient(spec.model, prompts_dir)
+            case Prefix.GROQ:
+                return GroqClient(spec.model, prompts_dir)

    # ------------------------------------------------------------------ #
    # 2. Heuristic fallback path (identical to the original behaviour)   #
@ -1514,8 +1811,8 @@ def load_model_client(model_id: str, prompts_dir: Optional[str] = None) -> BaseM
    logger.info(f"[load_model_client] Heuristic path: checking model='{spec.model}', lower_id='{lower_id}'")

    # Check if this is a reasoning model that should use Responses API
-    reasoning_models_requiring_responses = ['gpt-5-reasoning-alpha-2025-07-19', 'o4-mini', 'nectarine-alpha-2025-07-25', 'nectarine-alpha-new-reasoning-effort-2025-07-25']
-    if spec.model in reasoning_models_requiring_responses:
+    reasoning_models_requiring_responses = ['gpt-5-reasoning-alpha-2025-07-19', 'gpt-5', 'o4-mini']
+    if spec.model in reasoning_models_requiring_responses or spec.model.startswith('nectarine'):
        logger.info(f"[load_model_client] Selected OpenAIResponsesClient for reasoning model '{spec.model}'")
        return OpenAIResponsesClient(spec.model, prompts_dir, api_key=inline_key, reasoning_effort=reasoning_effort)

@ -1530,7 +1827,11 @@ def load_model_client(model_id: str, prompts_dir: Optional[str] = None) -> BaseM

    if "openrouter" in lower_id:
        logger.info(f"[load_model_client] Selected OpenRouterClient for '{spec.model}'")
-        return OpenRouterClient(spec.model, prompts_dir)
+        return OpenRouterClient(spec.model, prompts_dir, reasoning_effort=reasoning_effort)
+
+    if "minimax" in lower_id:
+        logger.info(f"[load_model_client] Selected ClaudeClient for MiniMax model '{spec.model}'")
+        return ClaudeClient(spec.model, prompts_dir)

    if "claude" in lower_id:
        logger.info(f"[load_model_client] Selected ClaudeClient for '{spec.model}'")
@ -1544,6 +1845,20 @@ def load_model_client(model_id: str, prompts_dir: Optional[str] = None) -> BaseM
        logger.info(f"[load_model_client] Selected DeepSeekClient for '{spec.model}'")
        return DeepSeekClient(spec.model, prompts_dir)

+    if spec.model.startswith("groq-"):
+        # e.g. "groq-openai/gpt-oss-20b" -> strip "groq-" prefix for the actual model name
+        actual_model_name = spec.model[5:]  # Remove "groq-" prefix
+        logger.info(f"[load_model_client] Selected GroqClient for '{spec.model}', using model name '{actual_model_name}'")
+        return GroqClient(actual_model_name, prompts_dir)
+
+    if spec.model.startswith("kimi-"):
+        logger.info(f"[load_model_client] Selected KimiClient for '{spec.model}'")
+        return KimiClient(spec.model, prompts_dir)
+
+    if "qwen" in lower_id or spec.model == "qwen3-max":
+        logger.info(f"[load_model_client] Selected QwenClient for '{spec.model}'")
+        return QwenClient(spec.model, prompts_dir)
+
    # Default: OpenAI-compatible async client
    logger.info(f"[load_model_client] No specific match found, using default OpenAIClient for '{spec.model}'")
    return OpenAIClient(