use managed server

This commit is contained in:
balyan.sid@gmail.com 2026-01-14 17:09:01 +05:30
parent 32320512e8
commit 6a27e88023
2 changed files with 128 additions and 56 deletions

View file

@ -169,6 +169,8 @@ class VLLMServer(APIServer):
prompt_tokens = prompt_tokens[1:]
if "max_new_tokens" in kwargs:
kwargs["max_tokens"] = kwargs.pop("max_new_tokens")
if "max_completion_tokens" in kwargs:
kwargs["max_tokens"] = kwargs.pop("max_completion_tokens")
if "model" in kwargs:
kwargs.pop("model")
# Prepare request for VLLM native API