diff --git a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
index a2539034..b7d56b0d 100644
--- a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
+++ b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
@@ -7,31 +7,31 @@ This enables matching up devices and information reported by the backend
 with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
 ---
  ggml/include/ggml-backend.h      |  1 +
- ggml/src/ggml-cuda/ggml-cuda.cu  | 33 ++++++++++++++++++++++++++++++++
+ ggml/src/ggml-cuda/ggml-cuda.cu  | 39 ++++++++++++++++++++++++++++++++
  ggml/src/ggml-metal/ggml-metal.m |  1 +
- 3 files changed, 35 insertions(+)
+ 3 files changed, 41 insertions(+)
 
 diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
-index 74e46716..a880df33 100644
+index 74e46716..48839339 100644
 --- a/ggml/include/ggml-backend.h
 +++ b/ggml/include/ggml-backend.h
 @@ -152,6 +152,7 @@ extern "C" {
      struct ggml_backend_dev_props {
          const char * name;
          const char * description;
-+        const char * uuid;
++        const char * id;
          size_t memory_free;
          size_t memory_total;
          enum ggml_backend_dev_type type;
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index cb0d8528..4c829153 100644
+index cb0d8528..d6960174 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
 @@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
      int device;
      std::string name;
      std::string description;
-+    std::string uuid;
++    std::string id;
  };
  
  static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -39,9 +39,9 @@ index cb0d8528..4c829153 100644
      return ctx->description.c_str();
  }
  
-+static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
++static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
 +    ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
-+    return ctx->uuid.c_str();
++    return ctx->id.c_str();
 +}
 +
  static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
@@ -51,17 +51,17 @@ index cb0d8528..4c829153 100644
  static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
      props->name        = ggml_backend_cuda_device_get_name(dev);
      props->description = ggml_backend_cuda_device_get_description(dev);
-+    props->uuid        = ggml_backend_cuda_device_get_uuid(dev);
++    props->id          = ggml_backend_cuda_device_get_id(dev);
      props->type        = ggml_backend_cuda_device_get_type(dev);
      ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
  
-@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
+@@ -3458,6 +3465,38 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                  CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
                  dev_ctx->description = prop.name;
  
 +                #if !defined(GGML_USE_HIP)
-+                char uuid[64];
-+                snprintf(uuid, sizeof(uuid),
++                char id[64];
++                snprintf(id, sizeof(id),
 +                    "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
 +                    (unsigned char)prop.uuid.bytes[0],
 +                    (unsigned char)prop.uuid.bytes[1],
@@ -80,23 +80,29 @@ index cb0d8528..4c829153 100644
 +                    (unsigned char)prop.uuid.bytes[14],
 +                    (unsigned char)prop.uuid.bytes[15]
 +                  );
-+                dev_ctx->uuid = uuid;
++                dev_ctx->id = id;
 +                #else
-+                dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
++                #ifdef _WIN32
++                char id[16];
++                snprintf(id, sizeof(id), "%d", i);
++                dev_ctx->id = id;
++                #else
++                dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
++                #endif
 +                #endif
 +
                  ggml_backend_dev_t dev = new ggml_backend_device {
                      /* .iface   = */ ggml_backend_cuda_device_interface,
                      /* .reg     = */ &reg,
 diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
-index 1b56f858..ee4f2dcb 100644
+index 1b56f858..a9eeebc6 100644
 --- a/ggml/src/ggml-metal/ggml-metal.m
 +++ b/ggml/src/ggml-metal/ggml-metal.m
 @@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
  static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
      props->name        = ggml_backend_metal_device_get_name(dev);
      props->description = ggml_backend_metal_device_get_description(dev);
-+    props->uuid        = "0";
++    props->id          = "0";
      props->type        = ggml_backend_metal_device_get_type(dev);
      ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
      props->caps = (struct ggml_backend_dev_caps) {
diff --git a/ml/backend.go b/ml/backend.go
index 61066c1a..06f9de9a 100644
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -124,9 +124,9 @@ type DeviceMemory struct {
 	// may not be persistent across instances of the runner.
 	Name string
 
-	// UUID is a unique persistent identifier for the device for matching
-	// with system management libraries
-	UUID string
+	// ID is an identifier for the device for matching with system
+	// management libraries.
+	ID string
 
 	// Weights is the per-layer memory needed for the model weights.
 	Weights []Memory
@@ -156,8 +156,8 @@ func (m DeviceMemory) LogValue() slog.Value {
 		attrs = append(attrs, slog.Any("Graph", m.Graph))
 	}
 
-	if len(attrs) > 0 && m.UUID != "" {
-		attrs = append([]slog.Attr{slog.String("UUID", m.UUID)}, attrs...)
+	if len(attrs) > 0 && m.ID != "" {
+		attrs = append([]slog.Attr{slog.String("ID", m.ID)}, attrs...)
 	}
 
 	return slog.GroupValue(attrs...)
diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go
index 4f1212de..680910f8 100644
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -138,7 +138,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
 	requiredMemory.CPU.Name = C.GoString(C.ggml_backend_dev_name(cpuDeviceBufferType.d))
 	var props C.struct_ggml_backend_dev_props
 	C.ggml_backend_dev_get_props(cpuDeviceBufferType.d, &props)
-	requiredMemory.CPU.UUID = C.GoString(props.uuid)
+	requiredMemory.CPU.ID = C.GoString(props.id)
 	requiredMemory.CPU.Weights = make([]ml.Memory, blocks+1)
 	requiredMemory.CPU.Cache = make([]ml.Memory, blocks+1)
 
@@ -155,7 +155,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
 		requiredMemory.GPUs[i].Name = C.GoString(C.ggml_backend_dev_name(d))
 		var props C.struct_ggml_backend_dev_props
 		C.ggml_backend_dev_get_props(d, &props)
-		requiredMemory.GPUs[i].UUID = C.GoString(props.uuid)
+		requiredMemory.GPUs[i].ID = C.GoString(props.id)
 		requiredMemory.GPUs[i].Weights = make([]ml.Memory, blocks+1)
 		requiredMemory.GPUs[i].Cache = make([]ml.Memory, blocks+1)
 	}
diff --git a/ml/backend/ggml/ggml/include/ggml-backend.h b/ml/backend/ggml/ggml/include/ggml-backend.h
index a880df33..48839339 100644
--- a/ml/backend/ggml/ggml/include/ggml-backend.h
+++ b/ml/backend/ggml/ggml/include/ggml-backend.h
@@ -152,7 +152,7 @@ extern "C" {
     struct ggml_backend_dev_props {
         const char * name;
         const char * description;
-        const char * uuid;
+        const char * id;
         size_t memory_free;
         size_t memory_total;
         enum ggml_backend_dev_type type;
diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
index 9e64e5ae..2b9fabf4 100644
--- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2888,7 +2888,7 @@ struct ggml_backend_cuda_device_context {
     int device;
     std::string name;
     std::string description;
-    std::string uuid;
+    std::string id;
 };
 
 static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -2901,9 +2901,9 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
     return ctx->description.c_str();
 }
 
-static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
+static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
     ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
-    return ctx->uuid.c_str();
+    return ctx->id.c_str();
 }
 
 static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
@@ -2920,7 +2920,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
 static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
     props->name        = ggml_backend_cuda_device_get_name(dev);
     props->description = ggml_backend_cuda_device_get_description(dev);
-    props->uuid        = ggml_backend_cuda_device_get_uuid(dev);
+    props->id          = ggml_backend_cuda_device_get_id(dev);
     props->type        = ggml_backend_cuda_device_get_type(dev);
     ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
 
@@ -3471,8 +3471,8 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                 dev_ctx->description = prop.name;
 
                 #if !defined(GGML_USE_HIP)
-                char uuid[64];
-                snprintf(uuid, sizeof(uuid),
+                char id[64];
+                snprintf(id, sizeof(id),
                     "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
                     (unsigned char)prop.uuid.bytes[0],
                     (unsigned char)prop.uuid.bytes[1],
@@ -3491,9 +3491,15 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                     (unsigned char)prop.uuid.bytes[14],
                     (unsigned char)prop.uuid.bytes[15]
                   );
-                dev_ctx->uuid = uuid;
+                dev_ctx->id = id;
                 #else
-                dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
+                #ifdef _WIN32
+                char id[16];
+                snprintf(id, sizeof(id), "%d", i);
+                dev_ctx->id = id;
+                #else
+                dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
+                #endif
                 #endif
 
                 ggml_backend_dev_t dev = new ggml_backend_device {
diff --git a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
index f20f5615..110c9ece 100644
--- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
+++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
@@ -5726,7 +5726,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
 static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
     props->name        = ggml_backend_metal_device_get_name(dev);
     props->description = ggml_backend_metal_device_get_description(dev);
-    props->uuid        = "0";
+    props->id          = "0";
     props->type        = ggml_backend_metal_device_get_type(dev);
     ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
     props->caps = (struct ggml_backend_dev_caps) {