fix linting

This commit is contained in:
teknium 2025-12-30 11:56:21 +00:00
parent 62fa51240c
commit 747fbc9285
4 changed files with 341 additions and 315 deletions

View file

@ -3,7 +3,7 @@ import collections
import time
from abc import ABC, abstractmethod
from asyncio import exceptions
from dataclasses import dataclass, field
from dataclasses import dataclass
from typing import Any, Dict, Literal, Optional
import numpy as np
@ -20,21 +20,22 @@ VALID_REASONING_EFFORTS = {"none", "minimal", "low", "medium", "high", "xhigh"}
class ReasoningConfig:
"""
Configuration for reasoning/thinking model support.
This config is used by ServerManager to automatically inject the appropriate
extra_body parameters into API requests based on the provider (OpenAI vs others).
Attributes:
enabled: Whether reasoning mode is enabled. Auto-set to True if effort or
enabled: Whether reasoning mode is enabled. Auto-set to True if effort or
max_tokens are specified.
effort: Reasoning effort level. One of: "none", "minimal", "low", "medium",
effort: Reasoning effort level. One of: "none", "minimal", "low", "medium",
"high", "xhigh". Default None (not specified).
max_tokens: Maximum tokens for reasoning (1024-32000). Default None.
"""
enabled: bool = False
effort: Optional[str] = None
max_tokens: Optional[int] = None
def __post_init__(self):
"""Validate and auto-enable if effort or max_tokens are set."""
# Validate effort if provided
@ -43,7 +44,7 @@ class ReasoningConfig:
f"Invalid reasoning_effort: {self.effort}. "
f"Must be one of: {VALID_REASONING_EFFORTS}"
)
# Validate max_tokens range if provided
if self.max_tokens is not None:
if self.max_tokens < 1024 or self.max_tokens > 32000:
@ -51,47 +52,49 @@ class ReasoningConfig:
f"max_reasoning_tokens must be between 1024 and 32000, "
f"got {self.max_tokens}"
)
# Auto-enable if effort or max_tokens are specified
if self.effort is not None or self.max_tokens is not None:
self.enabled = True
def is_active(self) -> bool:
"""Check if reasoning is active (enabled with any settings)."""
return self.enabled
def build_extra_body(self, base_url: Optional[str] = None) -> Optional[Dict[str, Any]]:
def build_extra_body(
self, base_url: Optional[str] = None
) -> Optional[Dict[str, Any]]:
"""
Build the extra_body dict for API requests based on provider.
Args:
base_url: The API base URL, used to detect OpenAI official endpoint.
Returns:
Dict to merge into extra_body, or None if reasoning not active.
Note:
OpenRouter only allows ONE of effort or max_tokens, not both.
When both are specified, effort takes priority.
"""
if not self.is_active():
return None
# Detect if using official OpenAI endpoint
is_openai_official = base_url and "api.openai.com" in base_url
if is_openai_official:
# OpenAI only accepts reasoning_effort at top level, not nested reasoning object
# They also don't support max_tokens for reasoning
effort = self.effort if self.effort else "medium"
# Map our extended effort levels to OpenAI's supported values
openai_effort_map = {
"none": "low", # OpenAI doesn't have "none", use low
"minimal": "low", # OpenAI doesn't have "minimal", use low
"none": "low", # OpenAI doesn't have "none", use low
"minimal": "low", # OpenAI doesn't have "minimal", use low
"low": "low",
"medium": "medium",
"high": "high",
"xhigh": "high", # OpenAI doesn't have "xhigh", use high
"xhigh": "high", # OpenAI doesn't have "xhigh", use high
}
return {"reasoning_effort": openai_effort_map.get(effort, "medium")}
else:
@ -105,18 +108,18 @@ class ReasoningConfig:
# Only add max_tokens if effort is not specified
reasoning["max_tokens"] = self.max_tokens
return {"reasoning": reasoning}
@classmethod
def from_env_config(cls, env_config) -> "ReasoningConfig":
"""
Create a ReasoningConfig from a BaseEnvConfig.
This is used by BaseEnv to convert environment config settings
into the reasoning configuration used by ServerManager.
Args:
env_config: A BaseEnvConfig (or subclass) instance with reasoning fields.
Returns:
A ReasoningConfig instance configured based on the env_config.
"""
@ -124,10 +127,14 @@ class ReasoningConfig:
thinking_mode = getattr(env_config, "thinking_mode", False)
reasoning_effort = getattr(env_config, "reasoning_effort", None)
max_reasoning_tokens = getattr(env_config, "max_reasoning_tokens", None)
# Determine if enabled: explicitly True, or implied by effort/max_tokens
enabled = thinking_mode or reasoning_effort is not None or max_reasoning_tokens is not None
enabled = (
thinking_mode
or reasoning_effort is not None
or max_reasoning_tokens is not None
)
return cls(
enabled=enabled,
effort=reasoning_effort,