mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-30 17:40:36 +00:00
make preserve thinking optional
This commit is contained in:
parent
12d61d197f
commit
f4875c5dc6
3 changed files with 22 additions and 8 deletions
|
|
@ -63,6 +63,7 @@ class ManagedServer:
|
|||
tokenizer: Optional[Any] = None,
|
||||
track_tree: bool = False,
|
||||
tool_parser: Optional[str] = None,
|
||||
preserve_think_blocks: bool = False,
|
||||
):
|
||||
"""
|
||||
Initialize the managed server.
|
||||
|
|
@ -79,12 +80,17 @@ class ManagedServer:
|
|||
chat_completion(). The parser handles extraction of structured
|
||||
tool calls from raw model output. See
|
||||
ToolParserManager.list_registered() for available parsers.
|
||||
preserve_think_blocks: If True, preserves <think> blocks in assistant messages,
|
||||
which are sometimes stripped by chat templates. Defaults to False.
|
||||
Usually not needed, since the chat template should be configured
|
||||
to preserve thinking blocks until a user message arrives.
|
||||
"""
|
||||
self.server = server
|
||||
self.tokenizer = tokenizer
|
||||
self.track_tree = track_tree
|
||||
self._tool_parser_name = tool_parser
|
||||
self._translator = None # Lazy init
|
||||
self._preserve_think_blocks = preserve_think_blocks
|
||||
|
||||
# Initialize storage based on mode
|
||||
if track_tree:
|
||||
|
|
@ -179,10 +185,11 @@ class ManagedServer:
|
|||
len(messages) == 0 or messages[-1].get("role") != "assistant"
|
||||
)
|
||||
|
||||
# Protect <think> blocks in assistant messages — some chat templates
|
||||
# (e.g. Qwen3) strip them during re-rendering, which breaks prefix
|
||||
# matching for multi-turn sequence extension.
|
||||
messages = self._protect_think_blocks(messages)
|
||||
if not self._preserve_think_blocks:
|
||||
# Protect <think> blocks in assistant messages — some chat templates
|
||||
# (e.g. Qwen3) strip them during re-rendering, which breaks prefix
|
||||
# matching for multi-turn sequence extension.
|
||||
messages = self._protect_think_blocks(messages)
|
||||
|
||||
# Build kwargs
|
||||
template_kwargs = {
|
||||
|
|
|
|||
|
|
@ -384,6 +384,7 @@ class ServerManager:
|
|||
self,
|
||||
tokenizer=None,
|
||||
base_url: Optional[str] = None,
|
||||
preserve_think_blocks: bool = False,
|
||||
):
|
||||
"""
|
||||
Context manager that provides a ManagedServer instance.
|
||||
|
|
@ -397,10 +398,13 @@ class ServerManager:
|
|||
|
||||
Args:
|
||||
tokenizer: Optional tokenizer to use. If not provided, will attempt to
|
||||
extract from server or create from model name.
|
||||
extract from server or create from model name.
|
||||
base_url: Pin the session to a specific backend server by its base_url.
|
||||
In production, this comes from the atropos API's server allocation.
|
||||
|
||||
In production, this comes from the atropos API's server allocation.
|
||||
preserve_think_blocks: If True, preserves <think> blocks in assistant messages,
|
||||
which are sometimes stripped by chat templates. Defaults to False.
|
||||
Usually not needed, since the chat template should be configured
|
||||
to preserve thinking blocks until a user message arrives.
|
||||
Yields:
|
||||
ManagedServer, DummyManagedServer, or ProxyManagedServer instance
|
||||
|
||||
|
|
@ -502,6 +506,7 @@ class ServerManager:
|
|||
server=selected_server,
|
||||
tokenizer=tokenizer,
|
||||
tool_parser=self.tool_parser,
|
||||
preserve_think_blocks=preserve_think_blocks,
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue