cleanup 2

2026-04-28 17:29:30 +00:00 · 2026-02-13 11:56:55 -05:00 · 2026-02-13 11:56:55 -05:00 · fe5b13a5da
commit fe5b13a5da
parent 0ebf3552c9
1 changed files with 9 additions and 9 deletions
--- a/example_trainer/trainers.py
+++ b/example_trainer/trainers.py
@ -798,7 +798,7 @@ def train_lora_restart(config: TrainingConfig):
            current_adapter_path = save_lora_checkpoint(model, config.save_path, step + 1)
            
            # Restart vLLM with new adapter
-            print(f"  [RESTART] Restarting vLLM with new adapter...")
+            print("  [RESTART] Restarting vLLM with new adapter...")
            _terminate_vllm(vllm_proc, config.vllm_port)
            vllm_proc = _launch_vllm_with_lora(config, current_adapter_path)
            if vllm_proc is None:
@ -899,7 +899,7 @@ def _launch_vllm_with_lora(config: TrainingConfig, adapter_path: str) -> Optiona
        env["CUDA_VISIBLE_DEVICES"] = str(config.vllm_gpu)
        print(f"  GPU: {config.vllm_gpu} (via CUDA_VISIBLE_DEVICES)")
    else:
-        print(f"  GPU: Same as trainer (inherited CUDA_VISIBLE_DEVICES)")
+        print("  GPU: Same as trainer (inherited CUDA_VISIBLE_DEVICES)")
    
    print(f"  Launching: {' '.join(cmd)}")
    print(f"  Adapter: {adapter_path}")
@ -917,7 +917,7 @@ def _launch_vllm_with_lora(config: TrainingConfig, adapter_path: str) -> Optiona
            start_new_session=True  # Creates new process group for easy cleanup
        )
        print(f"  vLLM PID: {proc.pid} (process group: {os.getpgid(proc.pid)})")
-        print(f"  NOTE: vLLM without --enforce-eager compiles CUDA graphs on startup (takes 1-3 min)...")
+        print("  NOTE: vLLM without --enforce-eager compiles CUDA graphs on startup (takes 1-3 min)...")
        
        # Wait for server to be ready (longer timeout for CUDA graph compilation)
        if not wait_for_vllm_ready(config.vllm_port, timeout=300):
@ -936,7 +936,7 @@ def _launch_vllm_with_lora(config: TrainingConfig, adapter_path: str) -> Optiona
            return None
        
        # Load the LoRA adapter
-        print(f"  Loading LoRA adapter...")
+        print("  Loading LoRA adapter...")
        try:
            resp = requests.post(
                f"http://localhost:{config.vllm_port}/lora/load",
@ -944,7 +944,7 @@ def _launch_vllm_with_lora(config: TrainingConfig, adapter_path: str) -> Optiona
                timeout=60,
            )
            if resp.status_code == 200:
-                print(f"  ✓ Adapter loaded successfully")
+                print("  ✓ Adapter loaded successfully")
            else:
                print(f"  WARNING: Adapter load returned {resp.status_code}: {resp.text}")
        except Exception as e:
@ -1050,12 +1050,12 @@ def _terminate_vllm(proc: Optional[subprocess.Popen], port: int = 9001) -> None:
        torch.cuda.synchronize()
        free_mem = torch.cuda.mem_get_info()[0] / 1e9
        total_mem = torch.cuda.mem_get_info()[1] / 1e9
-        print(f"  ✓ Final GPU memory: {free_mem:.1f}/{total_mem:.1f} GB free ({100*free_mem/total_mem:.0f}%)")
+        print(f" Final GPU memory: {free_mem:.1f}/{total_mem:.1f} GB free ({100*free_mem/total_mem:.0f}%)")
        
        if free_mem < total_mem * 0.3:
-            print(f"  ⚠ WARNING: Low GPU memory! May fail to restart vLLM.")
-            print(f"  Consider reducing --vllm-gpu-memory-utilization")
+            print("  WARNING: Low GPU memory! May fail to restart vLLM.")
+            print("  Consider reducing --vllm-gpu-memory-utilization")
    
-    print("  ✓ vLLM terminated")
+    print("  vLLM terminated")