Add reasoning configuration support across server implementations

- Updated server classes (OpenAIServer, SGLangServer, TrlVllmServer, VLLMServer) to accept a ReasoningConfig parameter during initialization.
- Enhanced ReasoningConfig to allow flexible max_tokens without strict validation, accommodating varying provider limits.
- Implemented reasoning configuration injection in APIServer methods for chat and completion handling.
- Updated tests to reflect changes in max_tokens validation logic.

This commit integrates reasoning capabilities into the server handling architecture, improving compatibility with diverse reasoning models.
This commit is contained in:
teknium 2026-01-05 23:20:01 +00:00
parent 6763649c3a
commit e1ece3e64e
7 changed files with 190 additions and 116 deletions

View file

@ -16,7 +16,11 @@ from openai.types.chat.chat_completion import (
from openai.types.completion import Completion, CompletionChoice
from transformers import AutoTokenizer
from atroposlib.envs.server_handling.server_baseline import APIServer, APIServerConfig
from atroposlib.envs.server_handling.server_baseline import (
APIServer,
APIServerConfig,
ReasoningConfig,
)
class TrlVllmServer(APIServer):
@ -24,10 +28,14 @@ class TrlVllmServer(APIServer):
A server that interfaces with trl's vLLM server.
"""
def __init__(self, config: APIServerConfig):
def __init__(
self,
config: APIServerConfig,
reasoning_config: ReasoningConfig = None,
):
self.config = config
self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
super().__init__(config)
super().__init__(config, reasoning_config=reasoning_config)
async def check_server_status_task(self, chat_completion: bool = True):
"""