Update GGML to b6646 (#12245)

Notable EOLs with this change: - MacOS v12 and v13 are no longer supported (v14+ required) - AMD gfx900 and gfx906 are no longer supported
2025-12-22 06:43:57 +00:00 · 2025-10-02 14:47:10 -07:00
parent fdb109469f
commit c68f367ef6
326 changed files with 30615 additions and 20624 deletions
--- a/llama/patches/0015-ggml-Export-GPU-UUIDs.patch
+++ b/llama/patches/0015-ggml-Export-GPU-UUIDs.patch
@@ -6,28 +6,28 @@ Subject: [PATCH] ggml: Export GPU UUIDs
 This enables matching up devices and information reported by the backend
 with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
 ---
- ggml/include/ggml-backend.h      |  1 +
- ggml/src/ggml-cuda/ggml-cuda.cu  | 67 +++++++++++++++++++++++++++++---
- ggml/src/ggml-metal/ggml-metal.m |  1 +
+ ggml/include/ggml-backend.h        |  1 +
+ ggml/src/ggml-cuda/ggml-cuda.cu    | 67 +++++++++++++++++++++++++++---
+ ggml/src/ggml-metal/ggml-metal.cpp |  1 +
 3 files changed, 63 insertions(+), 6 deletions(-)

 diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
-index 8a91b381..9424394e 100644
+index fe20dca3..48777212 100644
 --- a/ggml/include/ggml-backend.h
 +++ b/ggml/include/ggml-backend.h
-@@ -152,6 +152,7 @@ extern "C" {
-     struct ggml_backend_dev_props {
-         const char * name;
+@@ -158,6 +158,7 @@ extern "C" {
         const char * description;
-+        const char * id;
+         // device free memory in bytes
         size_t memory_free;
+        const char * id;
+         // device total memory in bytes
         size_t memory_total;
-         enum ggml_backend_dev_type type;
+         // device type
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index 37ee2a6d..57eae461 100644
+index fdf8c63d..ad389ece 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
-@@ -179,6 +179,51 @@ static int ggml_cuda_parse_id(char devName[]) {
+@@ -183,6 +183,51 @@ static int ggml_cuda_parse_id(char devName[]) {
 }
 #endif // defined(GGML_USE_HIP)
 
@@ -77,9 +77,9 @@ index 37ee2a6d..57eae461 100644
 +}
 +
 static ggml_cuda_device_info ggml_cuda_init() {
- #if defined(GGML_USE_HIP)
-     // Workaround for a rocBLAS bug when using multiple graphics cards:
-@@ -267,22 +312,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
+     ggml_cuda_device_info info = {};
+ 
+@@ -249,22 +294,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
                 info.devices[id].cc += prop.minor * 0x10;
             }
         }
@@ -107,18 +107,18 @@ index 37ee2a6d..57eae461 100644
 +        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
 +                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
 +                        ggml_cuda_parse_uuid(prop, id).c_str());
- #endif // defined(GGML_USE_HIP)
-     }
- 
-@@ -3144,6 +3191,7 @@ struct ggml_backend_cuda_device_context {
-     int device;
+         std::string device_name(prop.name);
+         if (device_name == "NVIDIA GeForce MX450") {
+             turing_devices_without_mma.push_back({ id, device_name });
+@@ -3273,6 +3320,7 @@ struct ggml_backend_cuda_device_context {
     std::string name;
     std::string description;
+     std::string pci_bus_id;
 +    std::string id;
 };
 
 static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
-@@ -3156,6 +3204,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
+@@ -3285,6 +3333,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
     return ctx->description.c_str();
 }
 
@@ -130,31 +130,31 @@ index 37ee2a6d..57eae461 100644
 static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
     ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
     ggml_cuda_set_device(ctx->device);
-@@ -3170,6 +3223,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
- static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
+@@ -3301,6 +3354,7 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
+ 
     props->name        = ggml_backend_cuda_device_get_name(dev);
     props->description = ggml_backend_cuda_device_get_description(dev);
 +    props->id          = ggml_backend_cuda_device_get_id(dev);
     props->type        = ggml_backend_cuda_device_get_type(dev);
+     props->device_id   = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
     ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
- 
-@@ -3767,6 +3821,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
+@@ -3871,6 +3925,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                 cudaDeviceProp prop;
                 CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
                 dev_ctx->description = prop.name;
 +                dev_ctx->id = ggml_cuda_parse_uuid(prop, i);
 
-                 ggml_backend_dev_t dev = new ggml_backend_device {
-                     /* .iface   = */ ggml_backend_cuda_device_interface,
-diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
-index 7bccc7bf..fe7b2f0a 100644
--- a/ggml/src/ggml-metal/ggml-metal.m
-+++ b/ggml/src/ggml-metal/ggml-metal.m
-@@ -6522,6 +6522,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
- static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
+                 char pci_bus_id[16] = {};
+                 snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
+diff --git a/ggml/src/ggml-metal/ggml-metal.cpp b/ggml/src/ggml-metal/ggml-metal.cpp
+index 909e17de..08ab4fc9 100644
+--- a/ggml/src/ggml-metal/ggml-metal.cpp
+++ b/ggml/src/ggml-metal/ggml-metal.cpp
+@@ -538,6 +538,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
+ static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
     props->name        = ggml_backend_metal_device_get_name(dev);
     props->description = ggml_backend_metal_device_get_description(dev);
 +    props->id          = "0";
     props->type        = ggml_backend_metal_device_get_type(dev);
+ 
     ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
-     props->caps = (struct ggml_backend_dev_caps) {