Add reasoning configuration support across server implementations

- Updated server classes (OpenAIServer, SGLangServer, TrlVllmServer, VLLMServer) to accept a ReasoningConfig parameter during initialization.
- Enhanced ReasoningConfig to allow flexible max_tokens without strict validation, accommodating varying provider limits.
- Implemented reasoning configuration injection in APIServer methods for chat and completion handling.
- Updated tests to reflect changes in max_tokens validation logic.

This commit integrates reasoning capabilities into the server handling architecture, improving compatibility with diverse reasoning models.
This commit is contained in:
teknium 2026-01-05 23:20:01 +00:00
parent 6763649c3a
commit e1ece3e64e
7 changed files with 190 additions and 116 deletions

View file

@ -12,7 +12,11 @@ from pydantic_cli import FailedExecutionException
from transformers import AutoTokenizer
from atroposlib.envs.constants import NAMESPACE_SEP, OPENAI_NAMESPACE
from atroposlib.envs.server_handling.server_baseline import APIServer, APIServerConfig
from atroposlib.envs.server_handling.server_baseline import (
APIServer,
APIServerConfig,
ReasoningConfig,
)
class VLLMServer(APIServer):
@ -20,14 +24,18 @@ class VLLMServer(APIServer):
VLLM server handling.
"""
def __init__(self, config: APIServerConfig):
def __init__(
self,
config: APIServerConfig,
reasoning_config: ReasoningConfig = None,
):
self.openai = openai.AsyncClient(
api_key=config.api_key,
base_url=config.base_url,
timeout=config.timeout,
)
self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
super().__init__(config)
super().__init__(config, reasoning_config=reasoning_config)
async def check_server_status_task(self, chat_completion: bool = True):
while True: