resolving issues raised in pr

This commit is contained in:
sam-paech 2025-07-04 08:38:57 +10:00
parent ad817695ef
commit 73a3c0596a
7 changed files with 50 additions and 41 deletions

View file

@ -281,14 +281,12 @@ python lm_game.py --run_dir results/game_run_005 --prompts_dir ./prompts/my_vari
* **Model-ID syntax** * **Model-ID syntax**
``` ```
<prefix:>model[@base_url][#api_key] <client prefix:>model[@base_url][#api_key]
``` ```
* `prefix:` optional client (`openai`, `requests`, `claude`, `together`, …). * `prefix:` specify the client (`openai`, `openai-requests`, `openai-responses`, `anthropic`, `gemini`, `deepseek`, `openrouter`, `together`).
* `@base_url` hit a proxy / alt endpoint. * `@base_url` hit a proxy / alt endpoint.
* `#api_key` inline key (overrides env vars). * `#api_key` inline key (overrides env vars).
* **Examples**
```bash ```bash
# gpt-4o on openrouter for all powers: # gpt-4o on openrouter for all powers:
--models "openrouter:gpt-4o" --models "openrouter:gpt-4o"

View file

@ -7,7 +7,7 @@ import json_repair
import json5 # More forgiving JSON parser import json5 # More forgiving JSON parser
import ast import ast
from ..config import config from config import config
# Assuming BaseModelClient is importable from clients.py in the same directory # Assuming BaseModelClient is importable from clients.py in the same directory
from .clients import BaseModelClient from .clients import BaseModelClient
@ -543,7 +543,6 @@ class DiplomacyAgent:
raw_response = await run_llm_and_log( raw_response = await run_llm_and_log(
client=self.client, client=self.client,
prompt=full_prompt, prompt=full_prompt,
log_file_path=log_file_path, # Pass the main log file path
power_name=self.power_name, power_name=self.power_name,
phase=game.current_short_phase, phase=game.current_short_phase,
response_type="negotiation_diary_raw", # For run_llm_and_log context response_type="negotiation_diary_raw", # For run_llm_and_log context
@ -726,8 +725,7 @@ class DiplomacyAgent:
try: try:
raw_response = await run_llm_and_log( raw_response = await run_llm_and_log(
client=self.client, client=self.client,
prompt=prompt, prompt=prompt,
log_file_path=log_file_path,
power_name=self.power_name, power_name=self.power_name,
phase=game.current_short_phase, phase=game.current_short_phase,
response_type="order_diary", response_type="order_diary",
@ -871,7 +869,6 @@ class DiplomacyAgent:
raw_response = await run_llm_and_log( raw_response = await run_llm_and_log(
client=self.client, client=self.client,
prompt=prompt, prompt=prompt,
log_file_path=log_file_path,
power_name=self.power_name, power_name=self.power_name,
phase=game.current_short_phase, phase=game.current_short_phase,
response_type="phase_result_diary", response_type="phase_result_diary",
@ -992,7 +989,6 @@ class DiplomacyAgent:
response = await run_llm_and_log( response = await run_llm_and_log(
client=self.client, client=self.client,
prompt=prompt, prompt=prompt,
log_file_path=log_file_path,
power_name=power_name, power_name=power_name,
phase=current_phase, phase=current_phase,
response_type="state_update", response_type="state_update",

View file

@ -14,6 +14,7 @@ from openai import AsyncOpenAI as AsyncDeepSeekOpenAI # Alias for clarity
from anthropic import AsyncAnthropic from anthropic import AsyncAnthropic
import asyncio import asyncio
import requests import requests
from enum import StrEnum
import google.generativeai as genai import google.generativeai as genai
from together import AsyncTogether from together import AsyncTogether
@ -115,7 +116,6 @@ class BaseModelClient:
raw_response = await run_llm_and_log( raw_response = await run_llm_and_log(
client=self, client=self,
prompt=prompt, prompt=prompt,
log_file_path=log_file_path,
power_name=power_name, power_name=power_name,
phase=phase, phase=phase,
response_type="order", # Context for run_llm_and_log's own error logging response_type="order", # Context for run_llm_and_log's own error logging
@ -515,7 +515,6 @@ class BaseModelClient:
raw_response = await run_llm_and_log( raw_response = await run_llm_and_log(
client=self, client=self,
prompt=prompt, prompt=prompt,
log_file_path=log_file_path,
power_name=power_name, power_name=power_name,
phase=game_phase, # Use game_phase for logging phase=game_phase, # Use game_phase for logging
response_type="plan_reply", # Changed from 'plan' to avoid confusion response_type="plan_reply", # Changed from 'plan' to avoid confusion
@ -562,7 +561,6 @@ class BaseModelClient:
raw_response = await run_llm_and_log( raw_response = await run_llm_and_log(
client=self, client=self,
prompt=raw_input_prompt, prompt=raw_input_prompt,
log_file_path=log_file_path,
power_name=power_name, power_name=power_name,
phase=game_phase, phase=game_phase,
response_type="negotiation", # For run_llm_and_log's internal context response_type="negotiation", # For run_llm_and_log's internal context
@ -737,7 +735,6 @@ class BaseModelClient:
raw_plan_response = await run_llm_and_log( raw_plan_response = await run_llm_and_log(
client=self, # Pass self (the client instance) client=self, # Pass self (the client instance)
prompt=full_prompt, prompt=full_prompt,
log_file_path=log_file_path,
power_name=power_name, power_name=power_name,
phase=game.current_short_phase, phase=game.current_short_phase,
response_type="plan_generation", # More specific type for run_llm_and_log context response_type="plan_generation", # More specific type for run_llm_and_log context
@ -1261,17 +1258,22 @@ def _parse_model_spec(raw: str) -> ModelSpec:
return ModelSpec(prefix, model, base_part or None, key_part or None) return ModelSpec(prefix, model, base_part or None, key_part or None)
class Prefix(StrEnum):
OPENAI = "openai"
OPENAI_REQUESTS = "openai-requests"
OPENAI_RESPONSES = "openai-responses"
ANTHROPIC = "anthropic"
GEMINI = "gemini"
DEEPSEEK = "deepseek"
OPENROUTER = "openrouter"
TOGETHER = "together"
##############################################################################
# Factory load_model_client
##############################################################################
def load_model_client(model_id: str, prompts_dir: Optional[str] = None) -> BaseModelClient: def load_model_client(model_id: str, prompts_dir: Optional[str] = None) -> BaseModelClient:
""" """
Recognises strings like Recognises strings like
gpt-4o gpt-4o
gpt-4o@https://proxy anthropic:claude-3.7-sonnet
gpt-4o#sk-123 openai:llama-3-2-3b@https://localhost:8000#myapikey
openai:gpt-4o@https://proxy#sk-ABC
and returns the appropriate client. and returns the appropriate client.
If a prefix is omitted the function falls back to the original If a prefix is omitted the function falls back to the original
@ -1287,35 +1289,42 @@ def load_model_client(model_id: str, prompts_dir: Optional[str] = None) -> BaseM
# 1. Explicit prefix path # # 1. Explicit prefix path #
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
if spec.prefix: if spec.prefix:
match spec.prefix: try:
case "openai" | "oai": pref = Prefix(spec.prefix.lower())
except ValueError as exc:
raise ValueError(
f"[load_model_client] unknown prefix '{spec.prefix}'. "
"Allowed prefixes: openai, openai-requests, openai-responses, "
"anthropic, gemini, deepseek, openrouter, together."
) from exc
match pref:
case Prefix.OPENAI:
return OpenAIClient( return OpenAIClient(
model_name=spec.model, model_name=spec.model,
prompts_dir=prompts_dir, prompts_dir=prompts_dir,
base_url=spec.base, base_url=spec.base,
api_key=inline_key, api_key=inline_key,
) )
case "requests" | "req": case Prefix.OPENAI_REQUESTS:
return RequestsOpenAIClient( return RequestsOpenAIClient(
model_name=spec.model, model_name=spec.model,
prompts_dir=prompts_dir, prompts_dir=prompts_dir,
base_url=spec.base, base_url=spec.base,
api_key=inline_key, api_key=inline_key,
) )
case "responses" | "oai-resp" | "openai-responses": case Prefix.OPENAI_RESPONSES:
return OpenAIResponsesClient(spec.model, prompts_dir, api_key=inline_key) return OpenAIResponsesClient(spec.model, prompts_dir, api_key=inline_key)
case "claude": case Prefix.ANTHROPIC:
return ClaudeClient(spec.model, prompts_dir) return ClaudeClient(spec.model, prompts_dir)
case "gemini": case Prefix.GEMINI:
return GeminiClient(spec.model, prompts_dir) return GeminiClient(spec.model, prompts_dir)
case "deepseek": case Prefix.DEEPSEEK:
return DeepSeekClient(spec.model, prompts_dir) return DeepSeekClient(spec.model, prompts_dir)
case "openrouter" | "or": case Prefix.OPENROUTER:
return OpenRouterClient(spec.model, prompts_dir) return OpenRouterClient(spec.model, prompts_dir)
case "together": case Prefix.TOGETHER:
return TogetherAIClient(spec.model, prompts_dir) return TogetherAIClient(spec.model, prompts_dir)
case _:
logger.warning(f"[load_model_client] Unknown prefix '{spec.prefix}', falling back to heuristic path.")
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# 2. Heuristic fallback path (identical to the original behaviour) # # 2. Heuristic fallback path (identical to the original behaviour) #
@ -1349,8 +1358,6 @@ def load_model_client(model_id: str, prompts_dir: Optional[str] = None) -> BaseM
api_key=inline_key, api_key=inline_key,
) )
return OpenAIClient(model_name, prompts_dir, base_url)
############################################################################## ##############################################################################
# 1) Add a method to filter visible messages (near top-level or in BaseModelClient) # 1) Add a method to filter visible messages (near top-level or in BaseModelClient)

View file

@ -94,7 +94,6 @@ async def run_diary_consolidation(
raw_response = await run_llm_and_log( raw_response = await run_llm_and_log(
client=consolidation_client, client=consolidation_client,
prompt=prompt, prompt=prompt,
log_file_path=log_file_path,
power_name=agent.power_name, power_name=agent.power_name,
phase=game.current_short_phase, phase=game.current_short_phase,
response_type="diary_consolidation", response_type="diary_consolidation",

View file

@ -72,7 +72,6 @@ async def initialize_agent_state_ext(
response = await run_llm_and_log( response = await run_llm_and_log(
client=agent.client, client=agent.client,
prompt=full_prompt, prompt=full_prompt,
log_file_path=log_file_path,
power_name=power_name, power_name=power_name,
phase=current_phase, phase=current_phase,
response_type="initialization", # Context for run_llm_and_log internal error logging response_type="initialization", # Context for run_llm_and_log internal error logging

View file

@ -10,7 +10,7 @@ import string
import json import json
import asyncio import asyncio
from ..config import config from config import config
# Avoid circular import for type hinting # Avoid circular import for type hinting
if TYPE_CHECKING: if TYPE_CHECKING:
@ -398,10 +398,9 @@ def log_llm_response(
async def run_llm_and_log( async def run_llm_and_log(
client: "BaseModelClient", client: "BaseModelClient",
prompt: str, prompt: str,
log_file_path: str, # Kept for context, but not used for logging here power_name: Optional[str],
power_name: Optional[str], # Kept for context, but not used for logging here phase: str,
phase: str, # Kept for context, but not used for logging here response_type: str,
response_type: str, # Kept for context, but not used for logging here
temperature: float = 0.0, temperature: float = 0.0,
*, *,
attempts: int = 5, attempts: int = 5,

View file

@ -29,7 +29,18 @@ from collections import defaultdict, Counter
import re import re
from typing import Dict, List, Tuple, Optional, Any from typing import Dict, List, Tuple, Optional, Any
import statistics import statistics
from ..models import PowerEnum try:
# Absolute import works when the project root is on sys.path
from models import PowerEnum
except ImportError:
try:
# Relative import works when file is executed as part of the ai_diplomacy package
from ..models import PowerEnum
except ImportError as exc:
# Re-raise with context so the root cause is visible
raise ImportError(
"models.PowerEnum not found via absolute or relative import. "
) from exc
class StatisticalGameAnalyzer: class StatisticalGameAnalyzer:
"""Production-ready analyzer for AI Diplomacy game statistics. """Production-ready analyzer for AI Diplomacy game statistics.