atropos/example_trainer/vllm_patching/__init__.py
Jai Suphavadeeprasit 3ac4a64f6f patching problem
2026-02-13 11:26:25 -05:00

44 lines
1 KiB
Python

"""
vLLM Patching Module - Enables shared memory weight updates.
This module patches vLLM's GPUModelRunner to:
1. Call share_memory_() on model weights after loading
2. Spawn a daemon process that receives NCCL weight updates from trainers
3. Enable real-time weight synchronization without restarting vLLM
Usage:
# Import this BEFORE importing vllm
from example_trainer.vllm_patching import apply_patches
apply_patches()
# Then import vllm normally
from vllm import AsyncLLM
"""
from .patched_gpu_runner import (
PatchedGPUModelRunner,
apply_patches,
get_patched_runner,
is_patched,
)
from .weight_updater import weight_updater_process
from .distributed_utils import (
init_process_group,
broadcast_object_list,
get_inference_urls,
get_json_data,
)
__all__ = [
"PatchedGPUModelRunner",
"apply_patches",
"get_patched_runner",
"is_patched",
"weight_updater_process",
"init_process_group",
"broadcast_object_list",
"get_inference_urls",
"get_json_data",
]