diff --git a/atroposlib/envs/server_handling/README.md b/atroposlib/envs/server_handling/README.md
new file mode 100644
index 00000000..12ed0a96
--- /dev/null
+++ b/atroposlib/envs/server_handling/README.md
@@ -0,0 +1,70 @@
+# Server Handling
+
+This module provides server abstraction layers for different LLM inference backends.
+
+## Reasoning Model Support
+
+The `ReasoningConfig` class enables support for reasoning/thinking models across different providers.
+
+### Provider Differences
+
+| Feature | OpenAI | OpenRouter / Others |
+|---------|--------|---------------------|
+| Format | `{"reasoning_effort": "high"}` | `{"reasoning": {"enabled": true, "effort": "high"}}` |
+| Effort Levels | `none`, `minimal`, `low`, `medium`, `high`, `xhigh` | `none`, `minimal`, `low`, `medium`, `high`, `xhigh` |
+| Max Tokens | Not supported | `{"reasoning": {"max_tokens": 16000}}` |
+| Temperature | Must be `1.0` | No restriction |
+| Token Param | `max_completion_tokens` | `max_tokens` |
+
+### Effort Level to Token Mapping
+
+When providers don't support effort strings, effort levels map to approximate token budgets (based on 32k base):
+
+| Effort | Tokens | Percentage |
+|--------|--------|------------|
+| none | 1,024 | Minimum |
+| minimal | 3,200 | ~10% |
+| low | 6,400 | ~20% |
+| medium | 16,000 | ~50% |
+| high | 25,600 | ~80% |
+| xhigh | 30,400 | ~95% |
+
+### Provider Token Limits
+
+- **OpenRouter**: Caps Anthropic reasoning at 1,024-32,000 tokens ([docs](https://openrouter.ai/docs/guides/best-practices/reasoning-tokens))
+- **Native Anthropic**: Supports up to 128k extended thinking tokens
+
+### Usage
+
+Reasoning is only injected for **chat completions** (not completions or logprobs API).
+
+```python
+# Via environment config
+config = BaseEnvConfig(
+    thinking_mode=True,
+    reasoning_effort="high",
+    max_reasoning_tokens=16000,
+)
+
+# Direct ReasoningConfig
+reasoning_config = ReasoningConfig(
+    enabled=True,
+    effort="high",
+    max_tokens=16000,
+)
+```
+
+### Bypassing Reasoning Injection
+
+Pass `skip_reasoning=True` to any chat completion call:
+
+```python
+await server.chat_completion(messages=messages, skip_reasoning=True)
+```
+
+### Important Constraints
+
+1. **OpenRouter**: Only accepts ONE of `effort` or `max_tokens`, not both. When both specified, effort takes priority.
+2. **OpenAI**: All effort levels are passed through directly.
+3. **Auto-enable**: Setting `effort` or `max_tokens` automatically enables reasoning mode.
+
diff --git a/atroposlib/envs/server_handling/server_baseline.py b/atroposlib/envs/server_handling/server_baseline.py
index 7f014da5..276968ff 100644
--- a/atroposlib/envs/server_handling/server_baseline.py
+++ b/atroposlib/envs/server_handling/server_baseline.py
@@ -40,7 +40,6 @@ class ReasoningConfig:
 
     def __post_init__(self):
         """Validate and auto-enable if effort or max_tokens are set."""
-        # Validate effort if provided
         if self.effort is not None and self.effort not in VALID_REASONING_EFFORTS:
             raise ValueError(
                 f"Invalid reasoning_effort: {self.effort}. "
@@ -68,12 +67,12 @@ class ReasoningConfig:
     # Calculated as percentage of 32k base: none=min, minimal=10%, low=20%,
     # medium=50%, high=80%, xhigh=95%
     EFFORT_TO_MAX_TOKENS = {
-        "none": 1024,  # Minimum/disabled
-        "minimal": 3200,  # ~10% of 32k
-        "low": 6400,  # ~20% of 32k
-        "medium": 16000,  # ~50% of 32k
-        "high": 25600,  # ~80% of 32k
-        "xhigh": 30400,  # ~95% of 32k
+        "none": 1024,
+        "minimal": 3200,
+        "low": 6400,
+        "medium": 16000,
+        "high": 25600,
+        "xhigh": 30400,
     }
 
     def build_extra_body(
@@ -102,33 +101,20 @@ class ReasoningConfig:
         is_openai_official = base_url and "api.openai.com" in base_url
 
         if is_openai_official:
-            # OpenAI only accepts reasoning_effort at top level, not nested reasoning object
-            # They also don't support max_tokens for reasoning
+            # OpenAI uses top-level reasoning_effort
             effort = self.effort if self.effort else "medium"
-            # Map our extended effort levels to OpenAI's supported values
-            openai_effort_map = {
-                "none": "low",  # OpenAI doesn't have "none", use low
-                "minimal": "low",  # OpenAI doesn't have "minimal", use low
-                "low": "low",
-                "medium": "medium",
-                "high": "high",
-                "xhigh": "high",  # OpenAI doesn't have "xhigh", use high
-            }
-            return {"reasoning_effort": openai_effort_map.get(effort, "medium")}
+            return {"reasoning_effort": effort}
         else:
             # Standard format for OpenRouter, Nebius, Nous Portal, etc.
             reasoning = {"enabled": True}
 
-            # If use_max_tokens is True, convert effort to max_tokens
             if use_max_tokens and self.effort is not None:
                 reasoning["max_tokens"] = self.EFFORT_TO_MAX_TOKENS.get(
                     self.effort, 8192
                 )
             elif self.effort is not None:
-                # Pass effort string directly (provider may or may not support it)
                 reasoning["effort"] = self.effort
             elif self.max_tokens is not None:
-                # Use explicit max_tokens if provided
                 reasoning["max_tokens"] = self.max_tokens
 
             return {"reasoning": reasoning}
@@ -152,7 +138,6 @@ class ReasoningConfig:
         reasoning_effort = getattr(env_config, "reasoning_effort", None)
         max_reasoning_tokens = getattr(env_config, "max_reasoning_tokens", None)
 
-        # Determine if enabled: explicitly True, or implied by effort/max_tokens
         enabled = (
             thinking_mode
             or reasoning_effort is not None
@@ -325,7 +310,6 @@ class APIServer(ABC):
         if skip_reasoning:
             return kwargs
 
-        # Check if reasoning is configured and active
         if self.reasoning_config is None or not self.reasoning_config.is_reasoning_kwargs_active():
             return kwargs
 
@@ -333,17 +317,14 @@ class APIServer(ABC):
         base_url = getattr(self.config, "base_url", None)
         is_openai_official = base_url and "api.openai.com" in base_url
 
-        # Build the extra_body for reasoning
         reasoning_extra_body = self.reasoning_config.build_extra_body(base_url)
-
         if reasoning_extra_body:
             # Merge with any existing extra_body in kwargs
             existing_extra_body = kwargs.get("extra_body", {}) or {}
             kwargs["extra_body"] = {**existing_extra_body, **reasoning_extra_body}
 
-        # OpenAI reasoning models have specific requirements
+        # OpenAI requires temperature=1.0 and max_completion_tokens (not max_tokens)
         if is_openai_official:
-            # OpenAI reasoning models require temperature=1.0 (or unset)
             kwargs["temperature"] = 1.0
 
             # OpenAI reasoning models use max_completion_tokens instead of max_tokens
@@ -479,9 +460,7 @@ class APIServer(ABC):
         """
         if not self.initialized:
             if self.config.health_check:
-                if (
-                    self.config.base_url is not None
-                ):  # skip health check if using OpenAI API
+                if self.config.base_url is not None:
                     self.check_task = asyncio.create_task(
                         self.check_server_status_task()
                     )
@@ -493,7 +472,6 @@ class APIServer(ABC):
         kwargs["model"] = self.config.model_name
         split = kwargs.pop("split", "train")
 
-        # Inject reasoning config if enabled (can be skipped via skip_reasoning=True)
         kwargs = self._inject_reasoning_kwargs(kwargs)
 
         stat_dict = {}