mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
qwen tokenizer wrapper & fixed jinja template for tool handling (#224)
* added qwen tokenizer wrapper & fixed jinja template for tool handling issues in the official HF one * moved jinja template into it's own file
This commit is contained in:
parent
56fb50a503
commit
9f23c732dd
3 changed files with 117 additions and 0 deletions
0
atroposlib/utils/tokenizers/__init__.py
Normal file
0
atroposlib/utils/tokenizers/__init__.py
Normal file
45
atroposlib/utils/tokenizers/qwen_chat_template.jinja
Normal file
45
atroposlib/utils/tokenizers/qwen_chat_template.jinja
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.role == 'user' or (message.role == 'assistant' and message.tool_calls is not defined) or (message.role == 'tool' and loop.index > 1) %}
|
||||
{%- if message.role == 'user' %}
|
||||
{%- if tools and not loop.first %}
|
||||
{{- '<|im_start|>user\n' + message.content + '<|im_end|>\n' }}
|
||||
{%- elif not tools or (tools and loop.first) %}
|
||||
{{- '<|im_start|>user\n' + message.content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- elif message.role == 'assistant' %}
|
||||
{{- '<|im_start|>assistant\n' + message.content + '<|im_end|>\n' }}
|
||||
{%- elif message.role == 'tool' %}
|
||||
{{- '<|im_start|>user\n<tool_response>\n' + message.content + '\n</tool_response><|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- elif message.role == 'assistant' and message.tool_calls is defined %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if message.content %}
|
||||
{{- message.content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if tool_call.function is defined %}
|
||||
{{- '<tool_call>\n{\"name\": \"' + tool_call.function.name + '\", \"arguments\": ' + tool_call.function.arguments + '}\n</tool_call>' }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- endif %}
|
||||
72
atroposlib/utils/tokenizers/qwen_fixed_tokenizer.py
Normal file
72
atroposlib/utils/tokenizers/qwen_fixed_tokenizer.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
"""
|
||||
Custom Qwen tokenizer wrapper with fixed Jinja2 template.
|
||||
|
||||
This wrapper overrides the chat_template to avoid Jinja2 sandbox restrictions
|
||||
that prevent list.append() operations in the original Qwen tokenizer.
|
||||
|
||||
TLDR; tool calls with Qwen are a PITA
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
|
||||
class QwenFixedTokenizer:
|
||||
"""Wrapper around Qwen tokenizer with fixed chat template."""
|
||||
|
||||
@classmethod
|
||||
def _load_chat_template(cls) -> str:
|
||||
"""Load the chat template from the .jinja file."""
|
||||
template_path = Path(__file__).parent / "qwen_chat_template.jinja"
|
||||
with open(template_path, "r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
def __init__(self, model_name_or_path: str, **kwargs):
|
||||
"""Initialize the tokenizer wrapper.
|
||||
|
||||
Args:
|
||||
model_name_or_path: Model name or path to load tokenizer from
|
||||
**kwargs: Additional arguments passed to AutoTokenizer.from_pretrained
|
||||
"""
|
||||
# Load the base tokenizer
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, **kwargs)
|
||||
|
||||
# Override the chat template with our fixed version
|
||||
self.tokenizer.chat_template = self._load_chat_template()
|
||||
|
||||
def __getattr__(self, name: str) -> Any:
|
||||
"""Delegate all other attributes to the underlying tokenizer."""
|
||||
return getattr(self.tokenizer, name)
|
||||
|
||||
def apply_chat_template(
|
||||
self,
|
||||
conversation: List[Dict[str, str]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
tokenize: bool = True,
|
||||
padding: bool = False,
|
||||
truncation: bool = False,
|
||||
max_length: Optional[int] = None,
|
||||
return_tensors: Optional[Union[str, bool]] = None,
|
||||
return_dict: bool = False,
|
||||
add_generation_prompt: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
"""Apply the fixed chat template.
|
||||
|
||||
This method delegates to the underlying tokenizer's apply_chat_template
|
||||
but ensures our fixed template is used.
|
||||
"""
|
||||
return self.tokenizer.apply_chat_template(
|
||||
conversation,
|
||||
tools=tools,
|
||||
tokenize=tokenize,
|
||||
padding=padding,
|
||||
truncation=truncation,
|
||||
max_length=max_length,
|
||||
return_tensors=return_tensors,
|
||||
return_dict=return_dict,
|
||||
add_generation_prompt=add_generation_prompt,
|
||||
**kwargs,
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue