[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
2026-04-19 12:57:58 +00:00 · 2025-12-30 00:26:29 +00:00 · 2025-12-30 00:26:29 +00:00 · 97047eee7b
commit 97047eee7b
parent 62fa51240c
5 changed files with 320 additions and 280 deletions
--- a/atroposlib/tests/test_reasoning_models.py
+++ b/atroposlib/tests/test_reasoning_models.py
@ -13,7 +13,7 @@ Providers tested:

 Usage:
    python -m pytest atroposlib/tests/test_reasoning_models.py -v
-    
+
    Or run directly:
    python atroposlib/tests/test_reasoning_models.py

@ -34,17 +34,16 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
 import openai

 from atroposlib.envs.server_handling.server_baseline import (
-    ReasoningConfig,
    VALID_REASONING_EFFORTS,
+    ReasoningConfig,
 )
 from environments.eval_environments.eval_helpers import (
    HERMES_REASONING_PROMPT,
    HERMES_REASONING_PROMPT_WITH_ANSWER,
-    extract_reasoning_from_response,
    extract_reasoning_from_completion,
+    extract_reasoning_from_response,
 )

-
 # =============================================================================
 # API CONFIGURATION
 # =============================================================================
@ -56,7 +55,9 @@ OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "gpt-5.2")
 OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1")

 OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY")
-OPENROUTER_BASE_URL = os.environ.get("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
+OPENROUTER_BASE_URL = os.environ.get(
+    "OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1"
+)

 # Models to test on OpenRouter
 OPENROUTER_MODELS = [
@ -69,10 +70,7 @@ OPENROUTER_MODELS = [
 TEST_PROMPT = "What is 15 * 23? Think step by step before giving your answer."

 # Log file for full ChatCompletion objects
-LOG_FILE = os.path.join(
-    os.path.dirname(__file__), 
-    "reasoning_test_results.log"
-)
+LOG_FILE = os.path.join(os.path.dirname(__file__), "reasoning_test_results.log")


 def log_to_file(message: str):
@ -85,6 +83,7 @@ def log_to_file(message: str):
 # UNIT TESTS FOR ReasoningConfig
 # =============================================================================

+
 def test_reasoning_config_default():
    """Test default ReasoningConfig is not active."""
    config = ReasoningConfig()
@ -101,11 +100,11 @@ def test_reasoning_config_enabled_only():
    config = ReasoningConfig(enabled=True)
    assert config.enabled
    assert config.is_active()
-    
+
    # Test for non-OpenAI provider
    extra_body = config.build_extra_body("https://openrouter.ai/api/v1")
    assert extra_body == {"reasoning": {"enabled": True}}
-    
+
    # Test for OpenAI provider
    extra_body = config.build_extra_body("https://api.openai.com/v1")
    assert extra_body == {"reasoning_effort": "medium"}
@ -118,11 +117,11 @@ def test_reasoning_config_with_effort():
    assert config.enabled  # Should be auto-enabled
    assert config.effort == "high"
    assert config.is_active()
-    
+
    # Test for non-OpenAI provider
    extra_body = config.build_extra_body("https://openrouter.ai/api/v1")
    assert extra_body == {"reasoning": {"enabled": True, "effort": "high"}}
-    
+
    # Test for OpenAI provider
    extra_body = config.build_extra_body("https://api.openai.com/v1")
    assert extra_body == {"reasoning_effort": "high"}
@ -135,11 +134,11 @@ def test_reasoning_config_with_max_tokens():
    assert config.enabled  # Should be auto-enabled
    assert config.max_tokens == 4096
    assert config.is_active()
-    
+
    # Test for non-OpenAI provider
    extra_body = config.build_extra_body("https://openrouter.ai/api/v1")
    assert extra_body == {"reasoning": {"enabled": True, "max_tokens": 4096}}
-    
+
    # Test for OpenAI provider (max_tokens not supported, falls back to medium)
    extra_body = config.build_extra_body("https://api.openai.com/v1")
    assert extra_body == {"reasoning_effort": "medium"}
@ -152,7 +151,7 @@ def test_reasoning_config_full():
    assert config.enabled
    assert config.effort == "xhigh"
    assert config.max_tokens == 8192
-    
+
    # Test for non-OpenAI provider
    # Note: OpenRouter only allows ONE of effort or max_tokens
    # When both are set, effort takes priority
@ -164,7 +163,7 @@ def test_reasoning_config_full():
            # max_tokens is NOT included when effort is specified (OpenRouter limitation)
        }
    }
-    
+
    # Test for OpenAI provider (xhigh maps to high)
    extra_body = config.build_extra_body("https://api.openai.com/v1")
    assert extra_body == {"reasoning_effort": "high"}
@ -181,12 +180,13 @@ def test_reasoning_config_effort_mapping():
        "high": "high",
        "xhigh": "high",
    }
-    
+
    for our_effort, expected_openai in mappings.items():
        config = ReasoningConfig(effort=our_effort)
        extra_body = config.build_extra_body("https://api.openai.com/v1")
-        assert extra_body["reasoning_effort"] == expected_openai, \
-            f"Expected {our_effort} to map to {expected_openai}, got {extra_body}"
+        assert (
+            extra_body["reasoning_effort"] == expected_openai
+        ), f"Expected {our_effort} to map to {expected_openai}, got {extra_body}"
    print("✓ Effort level mapping for OpenAI works correctly")


@ -208,7 +208,7 @@ def test_reasoning_config_invalid_max_tokens():
        assert False, "Should have raised ValueError for too low"
    except ValueError as e:
        assert "must be between 1024 and 32000" in str(e)
-    
+
    # Too high
    try:
        config = ReasoningConfig(max_tokens=50000)
@ -223,7 +223,7 @@ def test_hermes_prompts_defined():
    assert HERMES_REASONING_PROMPT is not None
    assert "<think>" in HERMES_REASONING_PROMPT
    assert "</think>" in HERMES_REASONING_PROMPT
-    
+
    assert HERMES_REASONING_PROMPT_WITH_ANSWER is not None
    assert "<answer>" in HERMES_REASONING_PROMPT_WITH_ANSWER
    print("✓ Hermes prompts are properly defined")
@ -233,10 +233,11 @@ def test_hermes_prompts_defined():
 # SERVER MANAGER INTEGRATION TESTS
 # =============================================================================

+
 def test_reasoning_config_from_env_config():
    """Test ReasoningConfig.from_env_config() creates correct config."""
    from atroposlib.envs.base import BaseEnvConfig
-    
+
    # Test with thinking_mode only
    env_config = BaseEnvConfig(
        tokenizer_name="gpt2",
@ -249,7 +250,7 @@ def test_reasoning_config_from_env_config():
    assert reasoning_config.effort is None
    assert reasoning_config.max_tokens is None
    print("✓ ReasoningConfig.from_env_config with thinking_mode=True works")
-    
+
    # Test with reasoning_effort
    env_config = BaseEnvConfig(
        tokenizer_name="gpt2",
@ -261,7 +262,7 @@ def test_reasoning_config_from_env_config():
    assert reasoning_config.enabled == True  # Auto-enabled because effort is set
    assert reasoning_config.effort == "high"
    print("✓ ReasoningConfig.from_env_config with reasoning_effort works")
-    
+
    # Test with max_reasoning_tokens
    env_config = BaseEnvConfig(
        tokenizer_name="gpt2",
@ -273,7 +274,7 @@ def test_reasoning_config_from_env_config():
    assert reasoning_config.enabled == True  # Auto-enabled because max_tokens is set
    assert reasoning_config.max_tokens == 8000
    print("✓ ReasoningConfig.from_env_config with max_reasoning_tokens works")
-    
+
    # Test with all disabled (default)
    env_config = BaseEnvConfig(
        tokenizer_name="gpt2",
@ -289,9 +290,10 @@ def test_reasoning_config_from_env_config():
 def test_server_manager_builds_extra_body():
    """Test ServerManager._build_extra_body() injects correct extra_body."""
    from unittest.mock import MagicMock
-    from atroposlib.envs.server_handling.server_manager import ServerManager
+
    from atroposlib.envs.server_handling.server_baseline import APIServerConfig
-    
+    from atroposlib.envs.server_handling.server_manager import ServerManager
+
    # Create a mock server with OpenRouter base_url
    openrouter_config = APIServerConfig(
        model_name="nousresearch/hermes-4-70b",
@ -299,27 +301,27 @@ def test_server_manager_builds_extra_body():
        api_key="test-key",
        num_requests_for_eval=10,
    )
-    
+
    # Create ServerManager with reasoning config
    reasoning_config = ReasoningConfig(enabled=True, effort="high")
-    
+
    # We can't easily instantiate ServerManager without actual servers,
    # so let's test the _build_extra_body logic directly
-    
+
    # Test OpenRouter format
    extra_body = reasoning_config.build_extra_body("https://openrouter.ai/api/v1")
    assert "reasoning" in extra_body
    assert extra_body["reasoning"]["enabled"] == True
    assert extra_body["reasoning"]["effort"] == "high"
    print("✓ ServerManager builds correct extra_body for OpenRouter")
-    
+
    # Test OpenAI format
    extra_body = reasoning_config.build_extra_body("https://api.openai.com/v1")
    assert "reasoning_effort" in extra_body
    assert extra_body["reasoning_effort"] == "high"
    assert "reasoning" not in extra_body  # Should NOT have nested reasoning
    print("✓ ServerManager builds correct extra_body for OpenAI")
-    
+
    # Test Claude (anthropic) - should use max_tokens
    claude_reasoning = ReasoningConfig(enabled=True, max_tokens=8000)
    extra_body = claude_reasoning.build_extra_body("https://openrouter.ai/api/v1")
@ -332,17 +334,17 @@ def test_server_manager_builds_extra_body():
 async def test_server_manager_injects_extra_body():
    """
    Integration test: Verify ServerManager actually injects extra_body in API calls.
-    
+
    This test creates a real ServerManager and makes an actual API call to verify
    the full flow works.
    """
    if not OPENROUTER_API_KEY:
        print("⚠ Skipping ServerManager integration test - OPENROUTER_API_KEY not set")
        return True
-    
-    from atroposlib.envs.server_handling.server_manager import ServerManager
+
    from atroposlib.envs.server_handling.server_baseline import APIServerConfig
-    
+    from atroposlib.envs.server_handling.server_manager import ServerManager
+
    # Create server config for OpenRouter
    server_config = APIServerConfig(
        model_name="nousresearch/hermes-4-70b",
@ -350,14 +352,14 @@ async def test_server_manager_injects_extra_body():
        api_key=OPENROUTER_API_KEY,
        num_requests_for_eval=10,
    )
-    
+
    # Create reasoning config
    reasoning_config = ReasoningConfig(enabled=True, effort="high")
-    
-    print("\n" + "="*60)
+
+    print("\n" + "=" * 60)
    print("Testing ServerManager.chat_completion() with reasoning injection")
-    print("="*60)
-    
+    print("=" * 60)
+
    try:
        # Create ServerManager with reasoning config (NOT in testing mode - we want real API call)
        server_manager = ServerManager(
@ -365,38 +367,49 @@ async def test_server_manager_injects_extra_body():
            reasoning_config=reasoning_config,
            testing=False,  # Actually make the API call
        )
-        
+
        # Make a chat completion call
        messages = [
            {"role": "system", "content": HERMES_REASONING_PROMPT},
-            {"role": "user", "content": "What is 2 + 2? Think carefully."}
+            {"role": "user", "content": "What is 2 + 2? Think carefully."},
        ]
-        
-        print(f"Making API call with reasoning config: enabled={reasoning_config.enabled}, effort={reasoning_config.effort}")
-        
+
+        print(
+            f"Making API call with reasoning config: enabled={reasoning_config.enabled}, effort={reasoning_config.effort}"
+        )
+
        completion = await server_manager.chat_completion(
            messages=messages,
            max_tokens=512,
            temperature=0.7,
        )
-        
+
        # Verify response has reasoning
        reasoning, source, content = extract_reasoning_from_completion(completion)
-        
+
        print(f"Response received!")
-        print(f"Content: {content[:100]}..." if content and len(content) > 100 else f"Content: {content}")
+        print(
+            f"Content: {content[:100]}..."
+            if content and len(content) > 100
+            else f"Content: {content}"
+        )
        print(f"Reasoning source: {source}")
        print(f"Reasoning length: {len(reasoning) if reasoning else 0} chars")
-        
+
        if reasoning:
-            print("✓ ServerManager.chat_completion() correctly injected reasoning extra_body")
+            print(
+                "✓ ServerManager.chat_completion() correctly injected reasoning extra_body"
+            )
            return True
        else:
-            print("⚠ Response received but no reasoning found (model may not support it)")
+            print(
+                "⚠ Response received but no reasoning found (model may not support it)"
+            )
            return True  # Still a pass - the injection worked, model just didn't return reasoning
-            
+
    except Exception as e:
        import traceback
+
        print(f"✗ ServerManager test failed: {e}")
        traceback.print_exc()
        return False
@ -405,18 +418,18 @@ async def test_server_manager_injects_extra_body():
 def test_full_env_config_to_server_flow():
    """
    Test the complete flow from BaseEnvConfig to ServerManager reasoning injection.
-    
+
    This verifies that:
    1. BaseEnvConfig with reasoning fields creates properly
    2. ReasoningConfig.from_env_config() works
    3. The resulting config would inject correct extra_body
    """
    from atroposlib.envs.base import BaseEnvConfig
-    
-    print("\n" + "="*60)
+
+    print("\n" + "=" * 60)
    print("Testing full BaseEnvConfig → ServerManager flow")
-    print("="*60)
-    
+    print("=" * 60)
+
    # Create a config like a user would
    env_config = BaseEnvConfig(
        tokenizer_name="gpt2",
@ -426,20 +439,20 @@ def test_full_env_config_to_server_flow():
        reasoning_effort="high",
        max_reasoning_tokens=8000,
    )
-    
+
    print(f"Created BaseEnvConfig:")
    print(f"  thinking_mode: {env_config.thinking_mode}")
    print(f"  reasoning_effort: {env_config.reasoning_effort}")
    print(f"  max_reasoning_tokens: {env_config.max_reasoning_tokens}")
-    
+
    # Convert to ReasoningConfig (this happens in BaseEnv.__init__)
    reasoning_config = ReasoningConfig.from_env_config(env_config)
-    
+
    print(f"\nReasoningConfig created:")
    print(f"  enabled: {reasoning_config.enabled}")
    print(f"  effort: {reasoning_config.effort}")
    print(f"  max_tokens: {reasoning_config.max_tokens}")
-    
+
    # Verify the config would generate correct extra_body
    # For OpenRouter
    openrouter_extra = reasoning_config.build_extra_body("https://openrouter.ai/api/v1")
@ -447,12 +460,12 @@ def test_full_env_config_to_server_flow():
    assert openrouter_extra["reasoning"]["enabled"] == True
    assert openrouter_extra["reasoning"]["effort"] == "high"
    # Note: max_tokens is NOT included when effort is set (OpenRouter limitation)
-    
+
    # For OpenAI
    openai_extra = reasoning_config.build_extra_body("https://api.openai.com/v1")
    print(f"\nOpenAI extra_body: {json.dumps(openai_extra, indent=2)}")
    assert openai_extra["reasoning_effort"] == "high"
-    
+
    print("\n✓ Full BaseEnvConfig → ServerManager flow works correctly!")
    return True

@ -461,54 +474,49 @@ def test_full_env_config_to_server_flow():
 # INTEGRATION TESTS WITH REAL API CALLS
 # =============================================================================

+
 async def test_openrouter_reasoning(model: str, effort: str = "high"):
    """
    Test reasoning with an OpenRouter model.
-    
+
    Args:
        model: Model name to test
        effort: Reasoning effort level
-    
+
    Returns:
        Dict with test results
    """
    print(f"\n{'='*60}")
    print(f"Testing OpenRouter: {model}")
    print(f"{'='*60}")
-    
+
    client = openai.AsyncOpenAI(
        api_key=OPENROUTER_API_KEY,
        base_url=OPENROUTER_BASE_URL,
    )
-    
+
    # Build extra_body based on model type
    # Claude models need max_tokens in reasoning dict, not effort
    # See: https://openrouter.ai/docs/guides/best-practices/reasoning-tokens
    is_claude = "claude" in model.lower() or "anthropic" in model.lower()
-    
+
    if is_claude:
        # Claude needs reasoning.max_tokens, and overall max_tokens must be higher
        reasoning_max_tokens = 8000
        overall_max_tokens = 0  # Must be > reasoning_max_tokens
-        extra_body = {
-            "reasoning": {
-                "max_tokens": reasoning_max_tokens
-            }
-        }
+        extra_body = {"reasoning": {"max_tokens": reasoning_max_tokens}}
    else:
        # Other models use effort
        config = ReasoningConfig(enabled=True, effort=effort)
        extra_body = config.build_extra_body(OPENROUTER_BASE_URL)
        overall_max_tokens = 0
-    
-    messages = [
-        {"role": "user", "content": TEST_PROMPT}
-    ]
-    
+
+    messages = [{"role": "user", "content": TEST_PROMPT}]
+
    # For Hermes, also add the system prompt
    if "hermes" in model.lower():
        messages.insert(0, {"role": "system", "content": HERMES_REASONING_PROMPT})
-    
+
    # Build the full request for logging
    request_params = {
        "model": model,
@ -517,12 +525,12 @@ async def test_openrouter_reasoning(model: str, effort: str = "high"):
        "temperature": 0.7,
        "extra_body": extra_body,
    }
-    
+
    print(f"Request params:")
    print(f"  model: {model}")
    print(f"  max_tokens: {overall_max_tokens}")
    print(f"  extra_body: {json.dumps(extra_body, indent=2)}")
-    
+
    # Log the full request to file
    log_to_file(f"\n{'='*70}")
    log_to_file(f"MODEL: {model}")
@ -530,7 +538,7 @@ async def test_openrouter_reasoning(model: str, effort: str = "high"):
    log_to_file(f"{'='*70}")
    log_to_file(f"\nREQUEST SENT:")
    log_to_file(json.dumps(request_params, indent=2, default=str))
-    
+
    try:
        completion = await client.chat.completions.create(
            model=model,
@ -539,45 +547,44 @@ async def test_openrouter_reasoning(model: str, effort: str = "high"):
            temperature=0.7,
            extra_body=extra_body,
        )
-        
+
        # Log full ChatCompletion object to file for inspection
        log_to_file(f"\nRESPONSE RECEIVED:")
        log_to_file(f"\nFULL CHATCOMPLETION OBJECT:")
        log_to_file(str(completion))
        log_to_file(f"\n{'='*40}")
-        
+
        # Also log the choice and message separately for clarity
        choice = completion.choices[0]
        log_to_file(f"\nChoice object: {choice}")
        log_to_file(f"\nMessage object: {choice.message}")
-        
+
        # Log all attributes on the message
        log_to_file(f"\nMessage attributes: {dir(choice.message)}")
-        
+
        # Try to get model_dump if available (pydantic)
        if hasattr(completion, "model_dump"):
            log_to_file(f"\nCompletion model_dump():")
            log_to_file(json.dumps(completion.model_dump(), indent=2, default=str))
-        
+
        if hasattr(choice, "model_dump"):
            log_to_file(f"\nChoice model_dump():")
            log_to_file(json.dumps(choice.model_dump(), indent=2, default=str))
-            
+
        if hasattr(choice.message, "model_dump"):
            log_to_file(f"\nMessage model_dump():")
            log_to_file(json.dumps(choice.message.model_dump(), indent=2, default=str))
-        
+
        # Get the response content
        content = completion.choices[0].message.content
        log_to_file(f"\nResponse content ({len(content) if content else 0} chars):")
        log_to_file(content if content else "(empty)")
-        
+
        # Try to extract reasoning
        reasoning, source = extract_reasoning_from_response(
-            completion.choices[0], 
-            content=content
+            completion.choices[0], content=content
        )
-        
+
        log_to_file(f"\nReasoning extraction result:")
        log_to_file(f"  Source: {source}")
        if reasoning:
@ -586,32 +593,41 @@ async def test_openrouter_reasoning(model: str, effort: str = "high"):
            log_to_file(reasoning)
        else:
            log_to_file("  No separate reasoning found")
-        
+
        # Check for <think> blocks in content
        has_think_block = "<think>" in content.lower() if content else False
        log_to_file(f"  Has <think> block in content: {has_think_block}")
-        
+
        # Check for reasoning_details in raw response
        has_reasoning_details = hasattr(completion.choices[0], "reasoning_details")
-        has_reasoning_content = hasattr(completion.choices[0].message, "reasoning_content")
+        has_reasoning_content = hasattr(
+            completion.choices[0].message, "reasoning_content"
+        )
        log_to_file(f"  Has reasoning_details attr: {has_reasoning_details}")
        log_to_file(f"  Has reasoning_content attr: {has_reasoning_content}")
-        
+
        # Try to access reasoning fields directly if they exist
-        if hasattr(choice.message, "reasoning_content") and choice.message.reasoning_content:
-            log_to_file(f"  message.reasoning_content: {choice.message.reasoning_content}")
+        if (
+            hasattr(choice.message, "reasoning_content")
+            and choice.message.reasoning_content
+        ):
+            log_to_file(
+                f"  message.reasoning_content: {choice.message.reasoning_content}"
+            )
        if hasattr(choice.message, "reasoning") and choice.message.reasoning:
            log_to_file(f"  message.reasoning: {choice.message.reasoning}")
        if hasattr(choice, "reasoning_details") and choice.reasoning_details:
            log_to_file(f"  choice.reasoning_details: {choice.reasoning_details}")
-        
+
        log_to_file(f"\n{'='*70}\n")
-        
+
        # Also print summary to console
        print(f"\nResponse content ({len(content) if content else 0} chars)")
-        print(f"Reasoning source: {source}, length: {len(reasoning) if reasoning else 0} chars")
+        print(
+            f"Reasoning source: {source}, length: {len(reasoning) if reasoning else 0} chars"
+        )
        print(f"(Full details logged to {LOG_FILE})")
-        
+
        return {
            "model": model,
            "success": True,
@ -620,7 +636,7 @@ async def test_openrouter_reasoning(model: str, effort: str = "high"):
            "reasoning_length": len(reasoning) if reasoning else 0,
            "has_think_block": has_think_block,
        }
-        
+
    except Exception as e:
        print(f"Error: {e}")
        return {
@ -633,30 +649,28 @@ async def test_openrouter_reasoning(model: str, effort: str = "high"):
 async def test_openai_reasoning(effort: str = "medium"):
    """
    Test reasoning with OpenAI official API.
-    
+
    Args:
        effort: Reasoning effort level
-    
+
    Returns:
        Dict with test results
    """
    print(f"\n{'='*60}")
    print(f"Testing OpenAI: {OPENAI_MODEL}")
    print(f"{'='*60}")
-    
+
    client = openai.AsyncOpenAI(
        api_key=OPENAI_API_KEY,
        base_url=OPENAI_BASE_URL,
    )
-    
+
    # Build extra_body using our ReasoningConfig
    config = ReasoningConfig(enabled=True, effort=effort)
    extra_body = config.build_extra_body(OPENAI_BASE_URL)
-    
-    messages = [
-        {"role": "user", "content": TEST_PROMPT}
-    ]
-    
+
+    messages = [{"role": "user", "content": TEST_PROMPT}]
+
    # Build the full request for logging
    request_params = {
        "model": OPENAI_MODEL,
@ -664,12 +678,12 @@ async def test_openai_reasoning(effort: str = "medium"):
        "max_completion_tokens": 1024,
        "extra_body": extra_body,
    }
-    
+
    print(f"Request params:")
    print(f"  model: {OPENAI_MODEL}")
    print(f"  max_completion_tokens: 1024")
    print(f"  extra_body: {json.dumps(extra_body, indent=2)}")
-    
+
    # Log the full request to file
    log_to_file(f"\n{'='*70}")
    log_to_file(f"MODEL: {OPENAI_MODEL} (OpenAI)")
@ -677,7 +691,7 @@ async def test_openai_reasoning(effort: str = "medium"):
    log_to_file(f"{'='*70}")
    log_to_file(f"\nREQUEST SENT:")
    log_to_file(json.dumps(request_params, indent=2, default=str))
-    
+
    try:
        completion = await client.chat.completions.create(
            model=OPENAI_MODEL,
@ -686,45 +700,44 @@ async def test_openai_reasoning(effort: str = "medium"):
            # Note: OpenAI reasoning models only support temperature=1 (default)
            extra_body=extra_body,
        )
-        
+
        # Log full ChatCompletion object to file for inspection
        log_to_file(f"\nRESPONSE RECEIVED:")
        log_to_file(f"\nFULL CHATCOMPLETION OBJECT:")
        log_to_file(str(completion))
        log_to_file(f"\n{'='*40}")
-        
+
        # Also log the choice and message separately for clarity
        choice = completion.choices[0]
        log_to_file(f"\nChoice object: {choice}")
        log_to_file(f"\nMessage object: {choice.message}")
-        
+
        # Log all attributes on the message
        log_to_file(f"\nMessage attributes: {dir(choice.message)}")
-        
+
        # Try to get model_dump if available (pydantic)
        if hasattr(completion, "model_dump"):
            log_to_file(f"\nCompletion model_dump():")
            log_to_file(json.dumps(completion.model_dump(), indent=2, default=str))
-        
+
        if hasattr(choice, "model_dump"):
            log_to_file(f"\nChoice model_dump():")
            log_to_file(json.dumps(choice.model_dump(), indent=2, default=str))
-            
+
        if hasattr(choice.message, "model_dump"):
            log_to_file(f"\nMessage model_dump():")
            log_to_file(json.dumps(choice.message.model_dump(), indent=2, default=str))
-        
+
        # Get the response content
        content = completion.choices[0].message.content
        log_to_file(f"\nResponse content ({len(content) if content else 0} chars):")
        log_to_file(content if content else "(empty)")
-        
+
        # Try to extract reasoning
        reasoning, source = extract_reasoning_from_response(
-            completion.choices[0], 
-            content=content
+            completion.choices[0], content=content
        )
-        
+
        log_to_file(f"\nReasoning extraction result:")
        log_to_file(f"  Source: {source}")
        if reasoning:
@ -733,26 +746,33 @@ async def test_openai_reasoning(effort: str = "medium"):
            log_to_file(reasoning)
        else:
            log_to_file("  No separate reasoning found")
-        
+
        # Check for <think> blocks in content (unlikely for OpenAI)
        has_think_block = "<think>" in content.lower() if content else False
        log_to_file(f"  Has <think> block in content: {has_think_block}")
-        
+
        # Try to access reasoning fields directly if they exist
-        if hasattr(choice.message, "reasoning_content") and choice.message.reasoning_content:
-            log_to_file(f"  message.reasoning_content: {choice.message.reasoning_content}")
+        if (
+            hasattr(choice.message, "reasoning_content")
+            and choice.message.reasoning_content
+        ):
+            log_to_file(
+                f"  message.reasoning_content: {choice.message.reasoning_content}"
+            )
        if hasattr(choice.message, "reasoning") and choice.message.reasoning:
            log_to_file(f"  message.reasoning: {choice.message.reasoning}")
        if hasattr(choice, "reasoning_details") and choice.reasoning_details:
            log_to_file(f"  choice.reasoning_details: {choice.reasoning_details}")
-        
+
        log_to_file(f"\n{'='*70}\n")
-        
+
        # Also print summary to console
        print(f"\nResponse content ({len(content) if content else 0} chars)")
-        print(f"Reasoning source: {source}, length: {len(reasoning) if reasoning else 0} chars")
+        print(
+            f"Reasoning source: {source}, length: {len(reasoning) if reasoning else 0} chars"
+        )
        print(f"(Full details logged to {LOG_FILE})")
-        
+
        return {
            "model": OPENAI_MODEL,
            "success": True,
@ -761,7 +781,7 @@ async def test_openai_reasoning(effort: str = "medium"):
            "reasoning_length": len(reasoning) if reasoning else 0,
            "has_think_block": has_think_block,
        }
-        
+
    except Exception as e:
        print(f"Error: {e}")
        return {
@ -773,60 +793,62 @@ async def test_openai_reasoning(effort: str = "medium"):

 async def run_all_integration_tests():
    """Run all integration tests and summarize results."""
-    print("\n" + "="*70)
+    print("\n" + "=" * 70)
    print("REASONING MODEL INTEGRATION TESTS")
-    print("="*70)
-    
+    print("=" * 70)
+
    # Check that API keys are set
    missing_keys = []
    if not OPENAI_API_KEY:
        missing_keys.append("OPENAI_API_KEY")
    if not OPENROUTER_API_KEY:
        missing_keys.append("OPENROUTER_API_KEY")
-    
+
    if missing_keys:
        print(f"\n⚠ Missing required environment variables: {', '.join(missing_keys)}")
        print("Set them before running integration tests:")
        print("  export OPENAI_API_KEY='your-key-here'")
        print("  export OPENROUTER_API_KEY='your-key-here'")
        return False
-    
+
    # Initialize log file
    with open(LOG_FILE, "w") as f:
        f.write(f"REASONING MODEL TEST RESULTS\n")
        f.write(f"Generated: {datetime.now().isoformat()}\n")
        f.write(f"{'='*70}\n\n")
-    
+
    print(f"\nLogging full ChatCompletion objects to: {LOG_FILE}\n")
-    
+
    results = []
-    
+
    # Test OpenRouter models
    for model in OPENROUTER_MODELS:
        result = await test_openrouter_reasoning(model)
        results.append(result)
-    
+
    # Test OpenAI
    result = await test_openai_reasoning()
    results.append(result)
-    
+
    # Summary
-    print("\n" + "="*70)
+    print("\n" + "=" * 70)
    print("TEST SUMMARY")
-    print("="*70)
-    
+    print("=" * 70)
+
    for result in results:
        status = "✓ PASS" if result.get("success") else "✗ FAIL"
        model = result.get("model", "unknown")
-        
+
        if result.get("success"):
            reasoning_info = f"reasoning: {result.get('reasoning_source', 'none')}"
            if result.get("reasoning_length", 0) > 0:
                reasoning_info += f" ({result['reasoning_length']} chars)"
            print(f"{status} | {model:40} | {reasoning_info}")
        else:
-            print(f"{status} | {model:40} | error: {result.get('error', 'unknown')[:30]}")
-    
+            print(
+                f"{status} | {model:40} | error: {result.get('error', 'unknown')[:30]}"
+            )
+
    # Check if any failed
    failures = [r for r in results if not r.get("success")]
    if failures:
@ -841,12 +863,13 @@ async def run_all_integration_tests():
 # MAIN
 # =============================================================================

+
 def run_unit_tests():
    """Run all unit tests (no API calls)."""
-    print("\n" + "="*70)
+    print("\n" + "=" * 70)
    print("UNIT TESTS")
-    print("="*70 + "\n")
-    
+    print("=" * 70 + "\n")
+
    # ReasoningConfig unit tests
    test_reasoning_config_default()
    test_reasoning_config_enabled_only()
@ -857,54 +880,52 @@ def run_unit_tests():
    test_reasoning_config_invalid_effort()
    test_reasoning_config_invalid_max_tokens()
    test_hermes_prompts_defined()
-    
+
    # ServerManager integration tests (no API calls)
    test_reasoning_config_from_env_config()
    test_server_manager_builds_extra_body()
    test_full_env_config_to_server_flow()
-    
-    print("\n" + "="*70)
+
+    print("\n" + "=" * 70)
    print("All unit tests passed!")
-    print("="*70)
+    print("=" * 70)


 async def run_server_manager_integration_test():
    """Run ServerManager integration test with real API call."""
-    print("\n" + "="*70)
+    print("\n" + "=" * 70)
    print("SERVER MANAGER INTEGRATION TEST")
-    print("="*70)
-    
+    print("=" * 70)
+
    result = await test_server_manager_injects_extra_body()
-    
+
    if result:
        print("\n✓ ServerManager integration test passed!")
    else:
        print("\n✗ ServerManager integration test failed!")
-    
+
    return result


 if __name__ == "__main__":
    import argparse
-    
+
    parser = argparse.ArgumentParser(description="Test reasoning model support")
    parser.add_argument(
-        "--unit-only", 
-        action="store_true",
-        help="Only run unit tests (no API calls)"
+        "--unit-only", action="store_true", help="Only run unit tests (no API calls)"
    )
    parser.add_argument(
        "--integration-only",
-        action="store_true", 
-        help="Only run integration tests (API calls to all providers)"
+        action="store_true",
+        help="Only run integration tests (API calls to all providers)",
    )
    parser.add_argument(
        "--server-manager-only",
        action="store_true",
-        help="Only run ServerManager integration test (single API call)"
+        help="Only run ServerManager integration test (single API call)",
    )
    args = parser.parse_args()
-    
+
    if args.integration_only:
        asyncio.run(run_all_integration_tests())
    elif args.unit_only:
@ -917,4 +938,3 @@ if __name__ == "__main__":
        run_unit_tests()
        asyncio.run(run_server_manager_integration_test())
        asyncio.run(run_all_integration_tests())
-