diff --git a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch index a2539034..b7d56b0d 100644 --- a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch +++ b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch @@ -7,31 +7,31 @@ This enables matching up devices and information reported by the backend with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml). --- ggml/include/ggml-backend.h | 1 + - ggml/src/ggml-cuda/ggml-cuda.cu | 33 ++++++++++++++++++++++++++++++++ + ggml/src/ggml-cuda/ggml-cuda.cu | 39 ++++++++++++++++++++++++++++++++ ggml/src/ggml-metal/ggml-metal.m | 1 + - 3 files changed, 35 insertions(+) + 3 files changed, 41 insertions(+) diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h -index 74e46716..a880df33 100644 +index 74e46716..48839339 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -152,6 +152,7 @@ extern "C" { struct ggml_backend_dev_props { const char * name; const char * description; -+ const char * uuid; ++ const char * id; size_t memory_free; size_t memory_total; enum ggml_backend_dev_type type; diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu -index cb0d8528..4c829153 100644 +index cb0d8528..d6960174 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context { int device; std::string name; std::string description; -+ std::string uuid; ++ std::string id; }; static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) { @@ -39,9 +39,9 @@ index cb0d8528..4c829153 100644 return ctx->description.c_str(); } -+static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) { ++static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) { + ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context; -+ return ctx->uuid.c_str(); ++ return ctx->id.c_str(); +} + static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { @@ -51,17 +51,17 @@ index cb0d8528..4c829153 100644 static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) { props->name = ggml_backend_cuda_device_get_name(dev); props->description = ggml_backend_cuda_device_get_description(dev); -+ props->uuid = ggml_backend_cuda_device_get_uuid(dev); ++ props->id = ggml_backend_cuda_device_get_id(dev); props->type = ggml_backend_cuda_device_get_type(dev); ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total); -@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { +@@ -3458,6 +3465,38 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { CUDA_CHECK(cudaGetDeviceProperties(&prop, i)); dev_ctx->description = prop.name; + #if !defined(GGML_USE_HIP) -+ char uuid[64]; -+ snprintf(uuid, sizeof(uuid), ++ char id[64]; ++ snprintf(id, sizeof(id), + "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + (unsigned char)prop.uuid.bytes[0], + (unsigned char)prop.uuid.bytes[1], @@ -80,23 +80,29 @@ index cb0d8528..4c829153 100644 + (unsigned char)prop.uuid.bytes[14], + (unsigned char)prop.uuid.bytes[15] + ); -+ dev_ctx->uuid = uuid; ++ dev_ctx->id = id; + #else -+ dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16); ++ #ifdef _WIN32 ++ char id[16]; ++ snprintf(id, sizeof(id), "%d", i); ++ dev_ctx->id = id; ++ #else ++ dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16); ++ #endif + #endif + ggml_backend_dev_t dev = new ggml_backend_device { /* .iface = */ ggml_backend_cuda_device_interface, /* .reg = */ ®, diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m -index 1b56f858..ee4f2dcb 100644 +index 1b56f858..a9eeebc6 100644 --- a/ggml/src/ggml-metal/ggml-metal.m +++ b/ggml/src/ggml-metal/ggml-metal.m @@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { props->name = ggml_backend_metal_device_get_name(dev); props->description = ggml_backend_metal_device_get_description(dev); -+ props->uuid = "0"; ++ props->id = "0"; props->type = ggml_backend_metal_device_get_type(dev); ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total); props->caps = (struct ggml_backend_dev_caps) { diff --git a/ml/backend.go b/ml/backend.go index 61066c1a..06f9de9a 100644 --- a/ml/backend.go +++ b/ml/backend.go @@ -124,9 +124,9 @@ type DeviceMemory struct { // may not be persistent across instances of the runner. Name string - // UUID is a unique persistent identifier for the device for matching - // with system management libraries - UUID string + // ID is an identifier for the device for matching with system + // management libraries. + ID string // Weights is the per-layer memory needed for the model weights. Weights []Memory @@ -156,8 +156,8 @@ func (m DeviceMemory) LogValue() slog.Value { attrs = append(attrs, slog.Any("Graph", m.Graph)) } - if len(attrs) > 0 && m.UUID != "" { - attrs = append([]slog.Attr{slog.String("UUID", m.UUID)}, attrs...) + if len(attrs) > 0 && m.ID != "" { + attrs = append([]slog.Attr{slog.String("ID", m.ID)}, attrs...) } return slog.GroupValue(attrs...) diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index 4f1212de..680910f8 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -138,7 +138,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) { requiredMemory.CPU.Name = C.GoString(C.ggml_backend_dev_name(cpuDeviceBufferType.d)) var props C.struct_ggml_backend_dev_props C.ggml_backend_dev_get_props(cpuDeviceBufferType.d, &props) - requiredMemory.CPU.UUID = C.GoString(props.uuid) + requiredMemory.CPU.ID = C.GoString(props.id) requiredMemory.CPU.Weights = make([]ml.Memory, blocks+1) requiredMemory.CPU.Cache = make([]ml.Memory, blocks+1) @@ -155,7 +155,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) { requiredMemory.GPUs[i].Name = C.GoString(C.ggml_backend_dev_name(d)) var props C.struct_ggml_backend_dev_props C.ggml_backend_dev_get_props(d, &props) - requiredMemory.GPUs[i].UUID = C.GoString(props.uuid) + requiredMemory.GPUs[i].ID = C.GoString(props.id) requiredMemory.GPUs[i].Weights = make([]ml.Memory, blocks+1) requiredMemory.GPUs[i].Cache = make([]ml.Memory, blocks+1) } diff --git a/ml/backend/ggml/ggml/include/ggml-backend.h b/ml/backend/ggml/ggml/include/ggml-backend.h index a880df33..48839339 100644 --- a/ml/backend/ggml/ggml/include/ggml-backend.h +++ b/ml/backend/ggml/ggml/include/ggml-backend.h @@ -152,7 +152,7 @@ extern "C" { struct ggml_backend_dev_props { const char * name; const char * description; - const char * uuid; + const char * id; size_t memory_free; size_t memory_total; enum ggml_backend_dev_type type; diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu index 9e64e5ae..2b9fabf4 100644 --- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu @@ -2888,7 +2888,7 @@ struct ggml_backend_cuda_device_context { int device; std::string name; std::string description; - std::string uuid; + std::string id; }; static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) { @@ -2901,9 +2901,9 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t return ctx->description.c_str(); } -static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) { +static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) { ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context; - return ctx->uuid.c_str(); + return ctx->id.c_str(); } static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { @@ -2920,7 +2920,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) { props->name = ggml_backend_cuda_device_get_name(dev); props->description = ggml_backend_cuda_device_get_description(dev); - props->uuid = ggml_backend_cuda_device_get_uuid(dev); + props->id = ggml_backend_cuda_device_get_id(dev); props->type = ggml_backend_cuda_device_get_type(dev); ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total); @@ -3471,8 +3471,8 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { dev_ctx->description = prop.name; #if !defined(GGML_USE_HIP) - char uuid[64]; - snprintf(uuid, sizeof(uuid), + char id[64]; + snprintf(id, sizeof(id), "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", (unsigned char)prop.uuid.bytes[0], (unsigned char)prop.uuid.bytes[1], @@ -3491,9 +3491,15 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { (unsigned char)prop.uuid.bytes[14], (unsigned char)prop.uuid.bytes[15] ); - dev_ctx->uuid = uuid; + dev_ctx->id = id; #else - dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16); + #ifdef _WIN32 + char id[16]; + snprintf(id, sizeof(id), "%d", i); + dev_ctx->id = id; + #else + dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16); + #endif #endif ggml_backend_dev_t dev = new ggml_backend_device { diff --git a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m index f20f5615..110c9ece 100644 --- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m +++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m @@ -5726,7 +5726,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { props->name = ggml_backend_metal_device_get_name(dev); props->description = ggml_backend_metal_device_get_description(dev); - props->uuid = "0"; + props->id = "0"; props->type = ggml_backend_metal_device_get_type(dev); ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total); props->caps = (struct ggml_backend_dev_caps) {