Add reasoning configuration support across server implementations

- Updated server classes (OpenAIServer, SGLangServer, TrlVllmServer, VLLMServer) to accept a ReasoningConfig parameter during initialization. - Enhanced ReasoningConfig to allow flexible max_tokens without strict validation, accommodating varying provider limits. - Implemented reasoning configuration injection in APIServer methods for chat and completion handling. - Updated tests to reflect changes in max_tokens validation logic. This commit integrates reasoning capabilities into the server handling architecture, improving compatibility with diverse reasoning models.
2026-04-19 12:57:58 +00:00 · 2026-01-05 23:20:01 +00:00 · 2026-01-05 23:20:01 +00:00 · e1ece3e64e
commit e1ece3e64e
parent 6763649c3a
7 changed files with 190 additions and 116 deletions
--- a/atroposlib/tests/test_reasoning_models.py
+++ b/atroposlib/tests/test_reasoning_models.py
@ -200,22 +200,25 @@ def test_reasoning_config_invalid_effort():
    print("✓ Invalid effort raises ValueError")


-def test_reasoning_config_invalid_max_tokens():
-    """Test that invalid max_tokens raises ValueError."""
-    # Too low
-    try:
-        ReasoningConfig(max_tokens=500)  # Should raise
-        assert False, "Should have raised ValueError for too low"
-    except ValueError as e:
-        assert "must be between 1024 and 32000" in str(e)
-
-    # Too high
-    try:
-        ReasoningConfig(max_tokens=50000)  # Should raise
-        assert False, "Should have raised ValueError for too high"
-    except ValueError as e:
-        assert "must be between 1024 and 32000" in str(e)
-    print("✓ Invalid max_tokens raises ValueError")
+def test_reasoning_config_max_tokens_no_validation():
+    """Test that max_tokens accepts any value (no range validation).
+    
+    Provider limits vary and may change over time:
+    - OpenRouter currently caps Anthropic at 1024-32000
+    - Native Anthropic API supports up to 128k extended thinking
+    We don't enforce limits here to allow flexibility.
+    """
+    # Low values should work
+    config_low = ReasoningConfig(max_tokens=500)
+    assert config_low.max_tokens == 500
+    assert config_low.enabled  # Auto-enabled
+    
+    # High values should work (e.g., for native Anthropic 128k thinking)
+    config_high = ReasoningConfig(max_tokens=128000)
+    assert config_high.max_tokens == 128000
+    assert config_high.enabled
+    
+    print("✓ max_tokens accepts any value (no range validation)")


 def test_hermes_prompts_defined():
@ -855,7 +858,7 @@ def run_unit_tests():
    test_reasoning_config_full()
    test_reasoning_config_effort_mapping()
    test_reasoning_config_invalid_effort()
-    test_reasoning_config_invalid_max_tokens()
+    test_reasoning_config_max_tokens_no_validation()
    test_hermes_prompts_defined()

    # ServerManager integration tests (no API calls)