atropos/example_trainer/vllm_patching/__init__.py

"""
vLLM Patching Module - Enables shared memory weight updates.

This module patches vLLM's GPUModelRunner to:
1. Call share_memory_() on model weights after loading
2. Spawn a daemon process that receives NCCL weight updates from trainers
3. Enable real-time weight synchronization without restarting vLLM

Usage:
    # Import this BEFORE importing vllm
    from example_trainer.vllm_patching import apply_patches
    apply_patches()

    # Then import vllm normally
    from vllm import AsyncLLM
"""

from .patched_gpu_runner import (
    PatchedGPUModelRunner,
    apply_patches,
    get_patched_runner,
    is_patched,
)
from .weight_updater import weight_updater_process
from .distributed_utils import (
    init_process_group,
    broadcast_object_list,
    get_inference_urls,
    get_json_data,
)

__all__ = [
    "PatchedGPUModelRunner",
    "apply_patches",
    "get_patched_runner",
    "is_patched",
    "weight_updater_process",
    "init_process_group",
    "broadcast_object_list",
    "get_inference_urls",
    "get_json_data",
]