fixing comments

2026-05-02 17:45:50 +00:00 · 2026-03-03 23:08:28 -05:00 · 2026-03-03 23:08:28 -05:00 · 1eeb31065f
commit 1eeb31065f
parent 51088ac24d
6 changed files with 21 additions and 81 deletions
--- a/atroposlib/envs/server_handling/vllm_server.py
+++ b/atroposlib/envs/server_handling/vllm_server.py
@ -284,14 +284,18 @@ class VLLMServer(APIServer):
        top_k = max(1, top_k)

        # Use input_ids if provided (from ManagedServer), otherwise tokenize prompt
+        from_prompt_text = False
        if "input_ids" in kwargs:
            prompt_tokens = kwargs.pop("input_ids")
            kwargs.pop("prompt", None)
        else:
            prompt_tokens = self.tokenizer.encode(kwargs.pop("prompt"))
+            from_prompt_text = True

-        # Check for double BOS token.
+        # Only normalize BOS for tokenizer-encoded prompt text.
        if (
+            from_prompt_text
+            and
            len(prompt_tokens) >= 2
            and prompt_tokens[0] == self.tokenizer.bos_token_id == prompt_tokens[1]
        ):
@ -306,6 +310,11 @@ class VLLMServer(APIServer):
        request_data = {"prompt": {"prompt_token_ids": prompt_tokens}}
        request_data["prompt_logprobs"] = top_k
        request_data.update(kwargs)
+        # This API is prompt-logprobs focused, not generation-focused.
+        request_data["n"] = 1
+        request_data["temperature"] = 0.0
+        request_data["top_p"] = 1.0
+        request_data.setdefault("max_tokens", 1)

        # Keep semaphore behavior consistent with other server calls.
        split = request_data.pop("split", "train")