mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-25 17:10:42 +00:00
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
This commit is contained in:
parent
d07ab3e3ce
commit
5cfd1929f1
19 changed files with 708 additions and 452 deletions
|
|
@ -17,7 +17,6 @@ import requests
|
|||
|
||||
from .config import TrainingConfig
|
||||
|
||||
|
||||
# Global variable to keep track of the vLLM process
|
||||
_vllm_process: Optional[subprocess.Popen] = None
|
||||
|
||||
|
|
@ -25,37 +24,34 @@ _vllm_process: Optional[subprocess.Popen] = None
|
|||
def is_port_in_use(port: int) -> bool:
|
||||
"""Check if a port is already in use."""
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
return s.connect_ex(('localhost', port)) == 0
|
||||
return s.connect_ex(("localhost", port)) == 0
|
||||
|
||||
|
||||
def kill_process_on_port(port: int, timeout: float = 5.0) -> bool:
|
||||
"""
|
||||
Kill any process using the specified port.
|
||||
|
||||
|
||||
Returns True if no process was running or if it was successfully killed.
|
||||
"""
|
||||
if not is_port_in_use(port):
|
||||
return True
|
||||
|
||||
|
||||
print(f" Port {port} is in use, attempting to kill existing process...")
|
||||
|
||||
|
||||
try:
|
||||
# Try to find and kill the process using lsof (Linux/Mac)
|
||||
result = subprocess.run(
|
||||
["lsof", "-t", "-i", f":{port}"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5
|
||||
["lsof", "-t", "-i", f":{port}"], capture_output=True, text=True, timeout=5
|
||||
)
|
||||
if result.stdout.strip():
|
||||
pids = result.stdout.strip().split('\n')
|
||||
pids = result.stdout.strip().split("\n")
|
||||
for pid in pids:
|
||||
try:
|
||||
os.kill(int(pid), signal.SIGTERM)
|
||||
print(f" Sent SIGTERM to PID {pid}")
|
||||
except (ProcessLookupError, ValueError):
|
||||
pass
|
||||
|
||||
|
||||
# Wait for port to be free
|
||||
start = time.time()
|
||||
while time.time() - start < timeout:
|
||||
|
|
@ -63,7 +59,7 @@ def kill_process_on_port(port: int, timeout: float = 5.0) -> bool:
|
|||
print(f" Port {port} is now free")
|
||||
return True
|
||||
time.sleep(0.5)
|
||||
|
||||
|
||||
# Force kill if still running
|
||||
for pid in pids:
|
||||
try:
|
||||
|
|
@ -71,7 +67,7 @@ def kill_process_on_port(port: int, timeout: float = 5.0) -> bool:
|
|||
print(f" Sent SIGKILL to PID {pid}")
|
||||
except (ProcessLookupError, ValueError):
|
||||
pass
|
||||
|
||||
|
||||
time.sleep(1)
|
||||
return not is_port_in_use(port)
|
||||
except FileNotFoundError:
|
||||
|
|
@ -84,7 +80,7 @@ def kill_process_on_port(port: int, timeout: float = 5.0) -> bool:
|
|||
pass
|
||||
except subprocess.TimeoutExpired:
|
||||
pass
|
||||
|
||||
|
||||
print(f" WARNING: Could not kill process on port {port}")
|
||||
return False
|
||||
|
||||
|
|
@ -135,7 +131,9 @@ def launch_vllm_server(
|
|||
if is_port_in_use(config.vllm_port):
|
||||
print(f" WARNING: Port {config.vllm_port} is already in use!")
|
||||
if not kill_process_on_port(config.vllm_port):
|
||||
print(f" ERROR: Could not free port {config.vllm_port}. Please manually kill the process.")
|
||||
print(
|
||||
f" ERROR: Could not free port {config.vllm_port}. Please manually kill the process."
|
||||
)
|
||||
print(f" Try: lsof -i :{config.vllm_port} | grep LISTEN")
|
||||
print(f" Or: pkill -f 'vllm.*{config.vllm_port}'")
|
||||
return None
|
||||
|
|
@ -155,7 +153,7 @@ def launch_vllm_server(
|
|||
"--gpu-memory-utilization",
|
||||
str(config.vllm_gpu_memory_utilization),
|
||||
]
|
||||
|
||||
|
||||
# Add served-model-name if using checkpoint path
|
||||
if model_path != config.model_name:
|
||||
vllm_command.extend(["--served-model-name", config.model_name])
|
||||
|
|
@ -209,7 +207,9 @@ def check_vllm_process_health() -> None:
|
|||
global _vllm_process
|
||||
|
||||
if _vllm_process is not None and _vllm_process.poll() is not None:
|
||||
print(f" WARNING: vLLM terminated unexpectedly (code: {_vllm_process.returncode})")
|
||||
print(
|
||||
f" WARNING: vLLM terminated unexpectedly (code: {_vllm_process.returncode})"
|
||||
)
|
||||
_vllm_process = None
|
||||
|
||||
|
||||
|
|
@ -299,7 +299,9 @@ def hotswap_lora_adapter(
|
|||
print(f" [LORA] ✓ Hot-swapped adapter: {adapter_name} ({adapter_path})")
|
||||
return True
|
||||
else:
|
||||
print(f" [LORA] ✗ Hot-swap failed: {response.status_code} - {response.text}")
|
||||
print(
|
||||
f" [LORA] ✗ Hot-swap failed: {response.status_code} - {response.text}"
|
||||
)
|
||||
return False
|
||||
|
||||
except requests.exceptions.ConnectionError:
|
||||
|
|
@ -308,4 +310,3 @@ def hotswap_lora_adapter(
|
|||
except Exception as e:
|
||||
print(f" [LORA] ✗ Error during hot-swap: {e}")
|
||||
return False
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue