mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
Some AMD GPUs do not provide UUIDs and report only "XX". In these cases, we should use the ordinal ID as an alternate identifier. This is the same as we always need to do on Windows for AMD. In addition, this prints out the ID for each GPU when enumerating them for easier debugging in the future.
161 lines
7.4 KiB
Diff
161 lines
7.4 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Jesse Gross <jesse@ollama.com>
|
|
Date: Thu, 24 Apr 2025 14:48:51 -0700
|
|
Subject: [PATCH] ggml: Export GPU UUIDs
|
|
|
|
This enables matching up devices and information reported by the backend
|
|
with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
|
|
---
|
|
ggml/include/ggml-backend.h | 1 +
|
|
ggml/src/ggml-cuda/ggml-cuda.cu | 67 +++++++++++++++++++++++++++++---
|
|
ggml/src/ggml-metal/ggml-metal.m | 1 +
|
|
3 files changed, 63 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
|
|
index 74e467163..48839339d 100644
|
|
--- a/ggml/include/ggml-backend.h
|
|
+++ b/ggml/include/ggml-backend.h
|
|
@@ -152,6 +152,7 @@ extern "C" {
|
|
struct ggml_backend_dev_props {
|
|
const char * name;
|
|
const char * description;
|
|
+ const char * id;
|
|
size_t memory_free;
|
|
size_t memory_total;
|
|
enum ggml_backend_dev_type type;
|
|
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
|
index cb0d8528d..1492368de 100644
|
|
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
|
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
|
@@ -173,6 +173,51 @@ static int ggml_cuda_parse_id(char devName[]) {
|
|
}
|
|
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
|
|
|
+static std::string ggml_cuda_parse_uuid(cudaDeviceProp prop, int device_num) {
|
|
+ char id[64];
|
|
+
|
|
+ #if !defined(GGML_USE_HIP)
|
|
+ snprintf(id, sizeof(id),
|
|
+ "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
|
+ (unsigned char)prop.uuid.bytes[0],
|
|
+ (unsigned char)prop.uuid.bytes[1],
|
|
+ (unsigned char)prop.uuid.bytes[2],
|
|
+ (unsigned char)prop.uuid.bytes[3],
|
|
+ (unsigned char)prop.uuid.bytes[4],
|
|
+ (unsigned char)prop.uuid.bytes[5],
|
|
+ (unsigned char)prop.uuid.bytes[6],
|
|
+ (unsigned char)prop.uuid.bytes[7],
|
|
+ (unsigned char)prop.uuid.bytes[8],
|
|
+ (unsigned char)prop.uuid.bytes[9],
|
|
+ (unsigned char)prop.uuid.bytes[10],
|
|
+ (unsigned char)prop.uuid.bytes[11],
|
|
+ (unsigned char)prop.uuid.bytes[12],
|
|
+ (unsigned char)prop.uuid.bytes[13],
|
|
+ (unsigned char)prop.uuid.bytes[14],
|
|
+ (unsigned char)prop.uuid.bytes[15]
|
|
+ );
|
|
+ #else
|
|
+ #ifdef _WIN32
|
|
+ snprintf(id, sizeof(id), "%d", device_num);
|
|
+ #else
|
|
+ try {
|
|
+ std::string uuid = std::string(prop.uuid.bytes, 16);
|
|
+
|
|
+ size_t pos = 0;
|
|
+ unsigned long long v = stoull(uuid, &pos, 16);
|
|
+ if (v == 0 || pos != uuid.size() || (!uuid.empty() && uuid[0] == '-'))
|
|
+ throw std::invalid_argument("invalid uuid");
|
|
+
|
|
+ snprintf(id, sizeof(id), "GPU-%016llx", v);
|
|
+ } catch (const std::exception &e) {
|
|
+ snprintf(id, sizeof(id), "%d", device_num);
|
|
+ }
|
|
+ #endif
|
|
+ #endif
|
|
+
|
|
+ return id;
|
|
+}
|
|
+
|
|
static ggml_cuda_device_info ggml_cuda_init() {
|
|
#ifdef __HIP_PLATFORM_AMD__
|
|
// Workaround for a rocBLAS bug when using multiple graphics cards:
|
|
@@ -261,22 +306,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|
info.devices[id].cc += prop.minor * 0x10;
|
|
}
|
|
}
|
|
- GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
|
|
+ GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, ID: %s\n",
|
|
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
|
|
- device_vmm ? "yes" : "no", prop.warpSize);
|
|
+ device_vmm ? "yes" : "no", prop.warpSize, ggml_cuda_parse_uuid(prop, id).c_str());
|
|
#elif defined(GGML_USE_MUSA)
|
|
// FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
|
|
info.devices[id].warp_size = 32;
|
|
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
|
info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
|
|
info.devices[id].cc += prop.minor * 0x10;
|
|
- GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
|
|
- id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
|
|
+ GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
|
|
+ id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
|
+ ggml_cuda_parse_uuid(prop, id).c_str());
|
|
#else
|
|
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
|
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
|
- GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
|
|
- id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
|
|
+ GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
|
|
+ id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
|
+ ggml_cuda_parse_uuid(prop, id).c_str());
|
|
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
|
}
|
|
|
|
@@ -2884,6 +2931,7 @@ struct ggml_backend_cuda_device_context {
|
|
int device;
|
|
std::string name;
|
|
std::string description;
|
|
+ std::string id;
|
|
};
|
|
|
|
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
|
|
@@ -2896,6 +2944,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
|
|
return ctx->description.c_str();
|
|
}
|
|
|
|
+static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
|
|
+ ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
|
|
+ return ctx->id.c_str();
|
|
+}
|
|
+
|
|
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
|
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
|
|
ggml_cuda_set_device(ctx->device);
|
|
@@ -2910,6 +2963,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
|
|
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
|
props->name = ggml_backend_cuda_device_get_name(dev);
|
|
props->description = ggml_backend_cuda_device_get_description(dev);
|
|
+ props->id = ggml_backend_cuda_device_get_id(dev);
|
|
props->type = ggml_backend_cuda_device_get_type(dev);
|
|
ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
|
|
|
@@ -3457,6 +3511,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
|
cudaDeviceProp prop;
|
|
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
|
|
dev_ctx->description = prop.name;
|
|
+ dev_ctx->id = ggml_cuda_parse_uuid(prop, i);
|
|
|
|
ggml_backend_dev_t dev = new ggml_backend_device {
|
|
/* .iface = */ ggml_backend_cuda_device_interface,
|
|
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
|
|
index 1b56f858c..a9eeebc6a 100644
|
|
--- a/ggml/src/ggml-metal/ggml-metal.m
|
|
+++ b/ggml/src/ggml-metal/ggml-metal.m
|
|
@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
|
|
static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
|
|
props->name = ggml_backend_metal_device_get_name(dev);
|
|
props->description = ggml_backend_metal_device_get_description(dev);
|
|
+ props->id = "0";
|
|
props->type = ggml_backend_metal_device_get_type(dev);
|
|
ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
|
props->caps = (struct ggml_backend_dev_caps) {
|