diff --git a/atroposlib/tests/test_reasoning_models.py b/atroposlib/tests/test_reasoning_models.py
index e3f0ecf8..290a47bb 100644
--- a/atroposlib/tests/test_reasoning_models.py
+++ b/atroposlib/tests/test_reasoning_models.py
@@ -27,6 +27,8 @@ import os
 import sys
 from datetime import datetime
 
+import pytest
+
 # Add the project root to path
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
 
@@ -324,8 +326,7 @@ async def test_server_manager_injects_extra_body():
     the full flow works.
     """
     if not OPENROUTER_API_KEY:
-        print("⚠ Skipping ServerManager integration test - OPENROUTER_API_KEY not set")
-        return True
+        pytest.skip("OPENROUTER_API_KEY not set - skipping integration test")
 
     from atroposlib.envs.server_handling.server_baseline import APIServerConfig
     from atroposlib.envs.server_handling.server_manager import ServerManager
@@ -345,60 +346,45 @@ async def test_server_manager_injects_extra_body():
     print("Testing ServerManager.chat_completion() with reasoning injection")
     print("=" * 60)
 
-    try:
-        # Create ServerManager with reasoning config (NOT in testing mode - we want real API call)
-        server_manager = ServerManager(
-            configs=[server_config],
-            reasoning_config=reasoning_config,
-            testing=False,  # Actually make the API call
-        )
+    # Create ServerManager with reasoning config (NOT in testing mode - we want real API call)
+    server_manager = ServerManager(
+        configs=[server_config],
+        reasoning_config=reasoning_config,
+        testing=False,  # Actually make the API call
+    )
 
-        # Make a chat completion call
-        messages = [
-            {"role": "system", "content": HERMES_REASONING_PROMPT},
-            {"role": "user", "content": "What is 2 + 2? Think carefully."},
-        ]
+    # Make a chat completion call
+    messages = [
+        {"role": "system", "content": HERMES_REASONING_PROMPT},
+        {"role": "user", "content": "What is 2 + 2? Think carefully."},
+    ]
 
-        print(
-            f"Making API call: enabled={reasoning_config.enabled}, "
-            f"effort={reasoning_config.effort}"
-        )
+    print(
+        f"Making API call: enabled={reasoning_config.enabled}, "
+        f"effort={reasoning_config.effort}"
+    )
 
-        completion = await server_manager.chat_completion(
-            messages=messages,
-            max_tokens=512,
-            temperature=0.7,
-        )
+    completion = await server_manager.chat_completion(
+        messages=messages,
+        max_tokens=512,
+        temperature=0.7,
+    )
 
-        # Verify response has reasoning
-        reasoning, source, content = extract_reasoning_from_completion(completion)
+    # Verify response has reasoning
+    reasoning, source, content = extract_reasoning_from_completion(completion)
 
-        print("Response received!")
-        print(
-            f"Content: {content[:100]}..."
-            if content and len(content) > 100
-            else f"Content: {content}"
-        )
-        print(f"Reasoning source: {source}")
-        print(f"Reasoning length: {len(reasoning) if reasoning else 0} chars")
+    print("Response received!")
+    print(
+        f"Content: {content[:100]}..."
+        if content and len(content) > 100
+        else f"Content: {content}"
+    )
+    print(f"Reasoning source: {source}")
+    print(f"Reasoning length: {len(reasoning) if reasoning else 0} chars")
 
-        if reasoning:
-            print(
-                "✓ ServerManager.chat_completion() correctly injected reasoning extra_body"
-            )
-            return True
-        else:
-            print(
-                "⚠ Response received but no reasoning found (model may not support it)"
-            )
-            return True  # Still a pass - the injection worked, model just didn't return reasoning
-
-    except Exception as e:
-        import traceback
-
-        print(f"✗ ServerManager test failed: {e}")
-        traceback.print_exc()
-        return False
+    # Assert we got a response (reasoning is optional - model may not support it)
+    assert content is not None, "Expected response content"
+    print("✓ ServerManager.chat_completion() correctly injected reasoning extra_body")
 
 
 def test_full_env_config_to_server_flow():
@@ -453,7 +439,6 @@ def test_full_env_config_to_server_flow():
     assert openai_extra["reasoning_effort"] == "high"
 
     print("\n✓ Full BaseEnvConfig → ServerManager flow works correctly!")
-    return True
 
 
 # =============================================================================
@@ -461,9 +446,12 @@ def test_full_env_config_to_server_flow():
 # =============================================================================
 
 
-async def test_openrouter_reasoning(model: str, effort: str = "high"):
+async def _run_openrouter_reasoning_test(model: str, effort: str = "high"):
     """
-    Test reasoning with an OpenRouter model.
+    Run reasoning test with an OpenRouter model (helper function).
+
+    Note: This is a helper function, not a pytest test. It's called by
+    run_all_integration_tests() when running the script directly.
 
     Args:
         model: Model name to test
@@ -632,9 +620,12 @@ async def test_openrouter_reasoning(model: str, effort: str = "high"):
         }
 
 
-async def test_openai_reasoning(effort: str = "medium"):
+async def _run_openai_reasoning_test(effort: str = "medium"):
     """
-    Test reasoning with OpenAI official API.
+    Run reasoning test with OpenAI official API (helper function).
+
+    Note: This is a helper function, not a pytest test. It's called by
+    run_all_integration_tests() when running the script directly.
 
     Args:
         effort: Reasoning effort level
@@ -809,11 +800,11 @@ async def run_all_integration_tests():
 
     # Test OpenRouter models
     for model in OPENROUTER_MODELS:
-        result = await test_openrouter_reasoning(model)
+        result = await _run_openrouter_reasoning_test(model)
         results.append(result)
 
     # Test OpenAI
-    result = await test_openai_reasoning()
+    result = await _run_openai_reasoning_test()
     results.append(result)
 
     # Summary
@@ -923,4 +914,4 @@ if __name__ == "__main__":
         # Run all tests
         run_unit_tests()
         asyncio.run(run_server_manager_integration_test())
-        asyncio.run(run_all_integration_tests())
\ No newline at end of file
+        asyncio.run(run_all_integration_tests())