patched

2026-04-19 12:57:58 +00:00 · 2026-01-13 16:31:40 -05:00 · 2026-01-13 16:31:40 -05:00 · f4e66705ea
commit f4e66705ea
parent 17e93cbda4
1 changed files with 9 additions and 22 deletions
--- a/example_trainer/vllm_patching/patched_gpu_runner.py
+++ b/example_trainer/vllm_patching/patched_gpu_runner.py
@ -245,32 +245,19 @@ def _create_patched_runner(BaseRunner: type) -> type:
                if tensor.is_cuda:
                    try:
                        import base64
-                        # Get the storage's IPC handle tuple
                        storage = tensor.untyped_storage()
-                        # _share_cuda_() returns: (handle, storage_size, storage_offset, ...)
                        share_data = storage._share_cuda_()
                        
-                        # Convert handle to bytes - it's a cudaIpcMemHandle_t (64 bytes)
-                        handle = share_data[0]
-                        if isinstance(handle, bytes):
-                            handle_bytes = handle
-                        elif hasattr(handle, '__bytes__'):
-                            handle_bytes = bytes(handle)
-                        else:
-                            # For cudaIpcMemHandle_t object, get raw bytes via memoryview
-                            import ctypes
-                            # cudaIpcMemHandle_t is 64 bytes
-                            handle_bytes = bytes(memoryview(handle).cast('B')[:64])
+                        # DEBUG: Print what we're getting
+                        if param_names and len(param_names) == 1:  # Only first tensor
+                            print(f"[vLLM Patch DEBUG] share_data type: {type(share_data)}", flush=True)
+                            print(f"[vLLM Patch DEBUG] share_data length: {len(share_data)}", flush=True)
+                            for i, item in enumerate(share_data):
+                                print(f"[vLLM Patch DEBUG] share_data[{i}]: type={type(item).__name__}, value={repr(item)[:100]}", flush=True)
+                        
+                        # For now, skip IPC - just debug
+                        # We'll implement proper handling once we see what the data looks like
                        
-                        ipc_handles[name] = {
-                            "handle_b64": base64.b64encode(handle_bytes).decode('ascii'),
-                            "storage_size": share_data[1],
-                            "storage_offset": tensor.storage_offset(),
-                            "shape": list(tensor.shape),
-                            "stride": list(tensor.stride()),
-                            "dtype": str(tensor.dtype),
-                            "device_index": tensor.device.index,
-                        }
                    except Exception as e:
                        print(f"[vLLM Patch] Could not get IPC handle for {name}: {e}", flush=True)
                        import traceback