Add reasoning configuration support across server implementations

- Updated server classes (OpenAIServer, SGLangServer, TrlVllmServer, VLLMServer) to accept a ReasoningConfig parameter during initialization. - Enhanced ReasoningConfig to allow flexible max_tokens without strict validation, accommodating varying provider limits. - Implemented reasoning configuration injection in APIServer methods for chat and completion handling. - Updated tests to reflect changes in max_tokens validation logic. This commit integrates reasoning capabilities into the server handling architecture, improving compatibility with diverse reasoning models.
2026-04-22 16:48:57 +00:00 · 2026-01-05 23:20:01 +00:00 · 2026-01-05 23:20:01 +00:00 · e1ece3e64e
commit e1ece3e64e
parent 6763649c3a
7 changed files with 190 additions and 116 deletions
--- a/atroposlib/envs/server_handling/vllm_server.py
+++ b/atroposlib/envs/server_handling/vllm_server.py
@ -12,7 +12,11 @@ from pydantic_cli import FailedExecutionException
 from transformers import AutoTokenizer

 from atroposlib.envs.constants import NAMESPACE_SEP, OPENAI_NAMESPACE
-from atroposlib.envs.server_handling.server_baseline import APIServer, APIServerConfig
+from atroposlib.envs.server_handling.server_baseline import (
+    APIServer,
+    APIServerConfig,
+    ReasoningConfig,
+)


 class VLLMServer(APIServer):
@ -20,14 +24,18 @@ class VLLMServer(APIServer):
    VLLM server handling.
    """

-    def __init__(self, config: APIServerConfig):
+    def __init__(
+        self,
+        config: APIServerConfig,
+        reasoning_config: ReasoningConfig = None,
+    ):
        self.openai = openai.AsyncClient(
            api_key=config.api_key,
            base_url=config.base_url,
            timeout=config.timeout,
        )
        self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
-        super().__init__(config)
+        super().__init__(config, reasoning_config=reasoning_config)

    async def check_server_status_task(self, chat_completion: bool = True):
        while True: