mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
Add reasoning configuration support across server implementations
- Updated server classes (OpenAIServer, SGLangServer, TrlVllmServer, VLLMServer) to accept a ReasoningConfig parameter during initialization. - Enhanced ReasoningConfig to allow flexible max_tokens without strict validation, accommodating varying provider limits. - Implemented reasoning configuration injection in APIServer methods for chat and completion handling. - Updated tests to reflect changes in max_tokens validation logic. This commit integrates reasoning capabilities into the server handling architecture, improving compatibility with diverse reasoning models.
This commit is contained in:
parent
6763649c3a
commit
e1ece3e64e
7 changed files with 190 additions and 116 deletions
|
|
@ -200,22 +200,25 @@ def test_reasoning_config_invalid_effort():
|
|||
print("✓ Invalid effort raises ValueError")
|
||||
|
||||
|
||||
def test_reasoning_config_invalid_max_tokens():
|
||||
"""Test that invalid max_tokens raises ValueError."""
|
||||
# Too low
|
||||
try:
|
||||
ReasoningConfig(max_tokens=500) # Should raise
|
||||
assert False, "Should have raised ValueError for too low"
|
||||
except ValueError as e:
|
||||
assert "must be between 1024 and 32000" in str(e)
|
||||
|
||||
# Too high
|
||||
try:
|
||||
ReasoningConfig(max_tokens=50000) # Should raise
|
||||
assert False, "Should have raised ValueError for too high"
|
||||
except ValueError as e:
|
||||
assert "must be between 1024 and 32000" in str(e)
|
||||
print("✓ Invalid max_tokens raises ValueError")
|
||||
def test_reasoning_config_max_tokens_no_validation():
|
||||
"""Test that max_tokens accepts any value (no range validation).
|
||||
|
||||
Provider limits vary and may change over time:
|
||||
- OpenRouter currently caps Anthropic at 1024-32000
|
||||
- Native Anthropic API supports up to 128k extended thinking
|
||||
We don't enforce limits here to allow flexibility.
|
||||
"""
|
||||
# Low values should work
|
||||
config_low = ReasoningConfig(max_tokens=500)
|
||||
assert config_low.max_tokens == 500
|
||||
assert config_low.enabled # Auto-enabled
|
||||
|
||||
# High values should work (e.g., for native Anthropic 128k thinking)
|
||||
config_high = ReasoningConfig(max_tokens=128000)
|
||||
assert config_high.max_tokens == 128000
|
||||
assert config_high.enabled
|
||||
|
||||
print("✓ max_tokens accepts any value (no range validation)")
|
||||
|
||||
|
||||
def test_hermes_prompts_defined():
|
||||
|
|
@ -855,7 +858,7 @@ def run_unit_tests():
|
|||
test_reasoning_config_full()
|
||||
test_reasoning_config_effort_mapping()
|
||||
test_reasoning_config_invalid_effort()
|
||||
test_reasoning_config_invalid_max_tokens()
|
||||
test_reasoning_config_max_tokens_no_validation()
|
||||
test_hermes_prompts_defined()
|
||||
|
||||
# ServerManager integration tests (no API calls)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue