mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-28 17:29:30 +00:00
error handling
This commit is contained in:
parent
9e53076a82
commit
80f67f979a
4 changed files with 38 additions and 1 deletions
|
|
@ -336,6 +336,14 @@ class VLLMWeightBridge:
|
|||
group_name="weight_update_group",
|
||||
)
|
||||
print("[Bridge] ✓ NCCL group created")
|
||||
|
||||
# Barrier synchronization to ensure both sides are ready
|
||||
print("[Bridge] Waiting for all ranks to be ready...")
|
||||
try:
|
||||
dist.barrier(group=self.gloo_group)
|
||||
print("[Bridge] ✓ All ranks synchronized and ready")
|
||||
except Exception as e:
|
||||
print(f"[Bridge] Warning: Barrier sync failed: {e}")
|
||||
|
||||
def _initialize_http_mode(self) -> None:
|
||||
"""Initialize HTTP-based weight synchronization (fallback)."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue