diff --git a/example_trainer/README.md b/example_trainer/README.md
index 7531538c..a1f69c2a 100644
--- a/example_trainer/README.md
+++ b/example_trainer/README.md
@@ -136,11 +136,14 @@ Zero model duplication - trainer and vLLM share the exact same GPU memory!
 run-api --port 8000
 
 # Terminal 2: Start vLLM with shared weights enabled
+# IMPORTANT: --enforce-eager is REQUIRED to disable CUDA graphs
+# Without it, weight updates won't be visible to inference!
 VLLM_ENABLE_SHARED_WEIGHTS=1 LOGDIR=$LOGDIR \
 CUDA_VISIBLE_DEVICES=0 python example_trainer/vllm_api_server.py \
     --model $MODEL \
     --port 9001 \
-    --gpu-memory-utilization 0.45
+    --gpu-memory-utilization 0.45 \
+    --enforce-eager
 
 # Terminal 3: Start the environment server
 python -u environments/gsm8k_server.py serve \
@@ -429,6 +432,20 @@ python example_trainer/vllm_api_server.py \
 VLLM_ENABLE_SHARED_WEIGHTS=1 LOGDIR=/tmp/atropos python example_trainer/vllm_api_server.py ...
 ```
 
+### "LogProb Alignment: MISMATCH!" in shared_vllm mode
+If you see `[MISMATCH!]` in the logprob alignment output, inference and training are seeing different weights. This is usually caused by **CUDA graphs**.
+
+**Symptom:** `inference_mean` stays constant while `training_mean` changes. The `diff` increases over time.
+
+**Fix:** Add `--enforce-eager` when starting vLLM:
+```bash
+VLLM_ENABLE_SHARED_WEIGHTS=1 LOGDIR=$LOGDIR \
+python example_trainer/vllm_api_server.py \
+    --model $MODEL --port 9001 --enforce-eager  # <-- REQUIRED!
+```
+
+**Why:** CUDA graphs "bake" model weights into compiled graphs at startup. Updates to the underlying tensors are NOT reflected in inference. Using `--enforce-eager` disables CUDA graphs, so vLLM reads from the shared tensors on every forward pass.
+
 ### "Triton compilation error" on B200/Blackwell GPUs
 The patched vLLM server (`vllm_api_server.py`) automatically applies B200 fixes. If using standard vLLM, add `--enforce-eager`.
 
diff --git a/example_trainer/scripts/run_comparison.sh b/example_trainer/scripts/run_comparison.sh
index 4f05c42e..fa56a5cf 100755
--- a/example_trainer/scripts/run_comparison.sh
+++ b/example_trainer/scripts/run_comparison.sh
@@ -241,12 +241,15 @@ echo "  ✓ run-api started (PID: $SHARED_API_PID, port 8002)"
 wait_for_api 8002 "shared" || { echo "Failed to start shared API"; exit 1; }
 
 # Start vLLM with shared weights (use separate config path)
+# NOTE: --enforce-eager is REQUIRED for single-copy mode!
+# Without it, CUDA graphs freeze weights and updates won't be visible to inference.
 echo "  Starting vLLM with shared weights..."
 VLLM_ENABLE_SHARED_WEIGHTS=1 VLLM_BRIDGE_CONFIG_PATH=$LOGDIR/vllm_bridge_config_shared.json \
 CUDA_VISIBLE_DEVICES=2 python example_trainer/vllm_api_server.py \
     --model $MODEL \
     --port 9002 \
     --gpu-memory-utilization 0.35 \
+    --enforce-eager \
     > $LOGDIR/vllm_shared.log 2>&1 &
 SHARED_VLLM_PID=$!
 echo "  ✓ vLLM started (PID: $SHARED_VLLM_PID, port 9002)"
diff --git a/example_trainer/scripts/test_single_copy_mode.sh b/example_trainer/scripts/test_single_copy_mode.sh
index 0a76e1e7..28efc6de 100644
--- a/example_trainer/scripts/test_single_copy_mode.sh
+++ b/example_trainer/scripts/test_single_copy_mode.sh
@@ -53,6 +53,8 @@ cd "$REPO_DIR"
 
 echo ""
 echo "[1/4] Starting vLLM with shared memory enabled..."
+# NOTE: --enforce-eager is REQUIRED for single-copy mode!
+# Without it, CUDA graphs freeze weights and updates won't be visible to inference.
 VLLM_ENABLE_SHARED_WEIGHTS=1 \
 LOGDIR="$LOG_DIR" \
 python -u example_trainer/vllm_api_server.py \
@@ -61,6 +63,7 @@ python -u example_trainer/vllm_api_server.py \
     --port $VLLM_PORT \
     --dtype bfloat16 \
     --gpu-memory-utilization 0.5 \
+    --enforce-eager \
     > "${LOG_DIR}/vllm.log" 2>&1 &
 
 echo "Waiting for vLLM (45s)..."