atropos/example_trainer/vllm_patching/__init__.py

"""
vLLM Patching Module - Enables CUDA IPC shared memory for single-copy training.

This module patches vLLM's GPUModelRunner to:
1. Call share_memory_() on model weights after loading
2. Export CUDA IPC handles to vllm_bridge_config.json
3. Enable the trainer to attach to vLLM's tensors directly

The result: ONE copy of model weights in GPU memory, shared between
vLLM (inference) and the trainer (gradient updates).

Usage:
    # Set environment BEFORE importing
    import os
    os.environ["VLLM_ENABLE_SHARED_WEIGHTS"] = "1"

    # Import and apply patches BEFORE importing vllm
    from example_trainer.vllm_patching import apply_patches
    apply_patches()

    # Then import vllm normally
    from vllm import AsyncLLM
"""

from .patched_gpu_runner import (
    PatchedGPUModelRunner,
    apply_patches,
    get_patched_runner,
    is_patched,
)

__all__ = [
    "PatchedGPUModelRunner",
    "apply_patches",
    "get_patched_runner",
    "is_patched",
]