diff --git a/discover/runner.go b/discover/runner.go
index cbaba3c6..e74050d0 100644
--- a/discover/runner.go
+++ b/discover/runner.go
@@ -117,7 +117,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 
 		// In the second pass, we more deeply initialize the GPUs to weed out devices that
 		// aren't supported by a given library.  We run this phase in parallel to speed up discovery.
-		slog.Debug("filtering out unsupported or overlapping GPU library combinations", "count", len(devices))
+		slog.Debug("evluating which if any devices to filter out", "initial_count", len(devices))
 		ctx2ndPass, cancel := context.WithTimeout(ctx, 30*time.Second)
 		defer cancel()
 		var wg sync.WaitGroup
@@ -129,7 +129,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 			if devices[i].Library == "Metal" {
 				continue
 			}
-			slog.Debug("verifying GPU is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "pci_id", devices[i].PCIID)
+			slog.Debug("verifying GPU is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "id", devices[i].ID, "pci_id", devices[i].PCIID)
 			wg.Add(1)
 			go func(i int) {
 				defer wg.Done()
@@ -155,6 +155,12 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 					envVar:           id,  // Filter to just this one GPU
 				}
 				if len(bootstrapDevices(ctx2ndPass, devices[i].LibraryPath, extraEnvs)) == 0 {
+					slog.Debug("filtering device which didn't fully initialize",
+						"id", devices[i].ID,
+						"libdir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1],
+						"pci_id", devices[i].PCIID,
+						"library", devices[i].Library,
+					)
 					needsDelete[i] = true
 				} else {
 					supportedMu.Lock()
@@ -170,7 +176,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 			}(i)
 		}
 		wg.Wait()
-		logutil.Trace("supported GPU library combinations", "supported", supported)
+		logutil.Trace("supported GPU library combinations before filtering", "supported", supported)
 
 		filterOutVulkanThatAreSupportedByOtherGPU(needsDelete)
 
@@ -372,12 +378,13 @@ func filterOutVulkanThatAreSupportedByOtherGPU(needsDelete []bool) {
 			}
 			if devices[j].PCIID == devices[i].PCIID && devices[j].Library != "Vulkan" && !needsDelete[j] {
 				needsDelete[i] = true
-				slog.Debug("dropping Vulkan duplicate by PCI ID",
-					"vulkan_id", devices[i].ID,
-					"vulkan_libdir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1],
+				slog.Debug("filtering device with duplicate PCI ID",
+					"id", devices[i].ID,
+					"library", devices[i].Library,
+					"libdir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1],
 					"pci_id", devices[i].PCIID,
-					"kept_library", devices[j].Library,
 					"kept_id", devices[j].ID,
+					"kept_library", devices[j].Library,
 				)
 				break
 			}
@@ -422,6 +429,12 @@ func filterOverlapByLibrary(supported map[string]map[string]map[string]int, need
 			}
 			for dev, i := range byLibDirs[libDir] {
 				if _, found := byLibDirs[newest][dev]; found {
+					slog.Debug("filtering device with overlapping libraries",
+						"id", dev,
+						"library", libDir,
+						"delete_index", i,
+						"kept_library", newest,
+					)
 					needsDelete[i] = true
 				}
 			}
diff --git a/discover/types.go b/discover/types.go
index b34bafd2..b1f622f4 100644
--- a/discover/types.go
+++ b/discover/types.go
@@ -3,6 +3,7 @@ package discover
 import (
 	"log/slog"
 	"path/filepath"
+	"sort"
 	"strings"
 
 	"github.com/ollama/ollama/format"
@@ -26,6 +27,7 @@ type CPU struct {
 }
 
 func LogDetails(devices []ml.DeviceInfo) {
+	sort.Sort(sort.Reverse(ml.ByFreeMemory(devices))) // Report devices in order of scheduling preference
 	for _, dev := range devices {
 		var libs []string
 		for _, dir := range dev.LibraryPath {
@@ -39,6 +41,7 @@ func LogDetails(devices []ml.DeviceInfo) {
 		}
 		slog.Info("inference compute",
 			"id", dev.ID,
+			"filtered_id", dev.FilteredID,
 			"library", dev.Library,
 			"compute", dev.Compute(),
 			"name", dev.Name,
diff --git a/llama/patches/0026-GPU-discovery-enhancements.patch b/llama/patches/0026-GPU-discovery-enhancements.patch
index 82513e34..807a4689 100644
--- a/llama/patches/0026-GPU-discovery-enhancements.patch
+++ b/llama/patches/0026-GPU-discovery-enhancements.patch
@@ -5,24 +5,33 @@ Subject: [PATCH] GPU discovery enhancements
 
 Expose more information about the devices through backend props, and leverage
 management libraries for more accurate VRAM usage reporting if available.
+
+vulkan: get GPU ID (ollama v0.11.5)
+
+Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
+
+Vulkan PCI and Memory
+
+fix vulkan PCI ID and ID handling
 ---
- ggml/include/ggml-backend.h        |  11 +
- ggml/src/CMakeLists.txt            |   2 +
- ggml/src/ggml-cuda/ggml-cuda.cu    |  74 +++++
- ggml/src/ggml-cuda/vendors/hip.h   |   3 +
- ggml/src/ggml-impl.h               |   8 +
- ggml/src/ggml-metal/ggml-metal.cpp |   2 +
- ggml/src/mem_hip.cpp               | 449 +++++++++++++++++++++++++++++
- ggml/src/mem_nvml.cpp              | 209 ++++++++++++++
- 8 files changed, 758 insertions(+)
+ ggml/include/ggml-backend.h          |   8 +
+ ggml/src/CMakeLists.txt              |   2 +
+ ggml/src/ggml-cuda/ggml-cuda.cu      |  65 ++++
+ ggml/src/ggml-cuda/vendors/hip.h     |   3 +
+ ggml/src/ggml-impl.h                 |   8 +
+ ggml/src/ggml-metal/ggml-metal.cpp   |   2 +
+ ggml/src/ggml-vulkan/ggml-vulkan.cpp | 212 +++++++++++--
+ ggml/src/mem_hip.cpp                 | 452 +++++++++++++++++++++++++++
+ ggml/src/mem_nvml.cpp                | 209 +++++++++++++
+ 9 files changed, 931 insertions(+), 30 deletions(-)
  create mode 100644 ggml/src/mem_hip.cpp
  create mode 100644 ggml/src/mem_nvml.cpp
 
 diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
-index ba181d09d..094fc3c82 100644
+index ba181d09d..809835243 100644
 --- a/ggml/include/ggml-backend.h
 +++ b/ggml/include/ggml-backend.h
-@@ -169,6 +169,17 @@ extern "C" {
+@@ -169,6 +169,14 @@ extern "C" {
          const char * device_id;
          // device capabilities
          struct ggml_backend_dev_caps caps;
@@ -31,9 +40,6 @@ index ba181d09d..094fc3c82 100644
 +        int compute_major;
 +        int compute_minor;
 +        int integrated;
-+        int pci_bus_id;
-+        int pci_device_id;
-+        int pci_domain_id;
 +        const char *library;
 +        // number with which the devices are accessed (Vulkan)
 +        const char *numeric_id;
@@ -54,7 +60,7 @@ index 0609c6503..aefe43bdd 100644
  
  target_include_directories(ggml-base PRIVATE .)
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index 87c6c34a4..816597d2f 100644
+index 87c6c34a4..b075a18be 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
 @@ -261,6 +261,16 @@ static ggml_cuda_device_info ggml_cuda_init() {
@@ -86,7 +92,7 @@ index 87c6c34a4..816597d2f 100644
          GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
                          id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
                          ggml_cuda_parse_uuid(prop, id).c_str());
-@@ -3484,6 +3499,14 @@ struct ggml_backend_cuda_device_context {
+@@ -3484,6 +3499,11 @@ struct ggml_backend_cuda_device_context {
      std::string description;
      std::string pci_bus_id;
      std::string id;
@@ -95,22 +101,19 @@ index 87c6c34a4..816597d2f 100644
 +    int driver_major;
 +    int driver_minor;
 +    int integrated;
-+    int pciBusID;
-+    int pciDeviceID;
-+    int pciDomainID;
  };
  
  static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
-@@ -3504,6 +3527,28 @@ static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
+@@ -3504,6 +3524,28 @@ static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
  static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
      ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
      ggml_cuda_set_device(ctx->device);
 +
 +#if defined(GGML_USE_HIP)
 +    if (ggml_hip_mgmt_init() == 0) {
-+        int status = ggml_hip_get_device_memory(ctx->pciBusID, ctx->pciDeviceID, free, total);
++        int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total);
 +        if (status == 0) {
-+            GGML_LOG_DEBUG("%s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, *free, *total);
++            GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total);
 +            ggml_hip_mgmt_release();
 +            return;
 +        }
@@ -120,7 +123,7 @@ index 87c6c34a4..816597d2f 100644
 +    if (ggml_nvml_init() == 0) {
 +        int status = ggml_nvml_get_device_memory(ctx->id.c_str(), free, total);
 +        if (status == 0) {
-+            GGML_LOG_DEBUG("%s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, *free, *total);
++            GGML_LOG_DEBUG("%s device %s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, ctx->id.c_str(), *free, *total);
 +            ggml_nvml_release();
 +            return;
 +        }
@@ -130,7 +133,7 @@ index 87c6c34a4..816597d2f 100644
      CUDA_CHECK(cudaMemGetInfo(free, total));
  }
  
-@@ -3512,6 +3557,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
+@@ -3512,6 +3554,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
      return GGML_BACKEND_DEVICE_TYPE_GPU;
  }
  
@@ -138,7 +141,7 @@ index 87c6c34a4..816597d2f 100644
  static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
      ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
  
-@@ -3525,6 +3571,22 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
+@@ -3525,6 +3568,19 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
      // If you need the memory data, call ggml_backend_dev_memory() explicitly.
      props->memory_total = props->memory_free = 0;
  
@@ -153,15 +156,12 @@ index 87c6c34a4..816597d2f 100644
 +    props->driver_major = ctx->driver_major;
 +    props->driver_minor = ctx->driver_minor;
 +    props->integrated = ctx->integrated;
-+    props->pci_bus_id = ctx->pciBusID;
-+    props->pci_device_id = ctx->pciDeviceID;
-+    props->pci_domain_id = ctx->pciDomainID;
 +    props->library = GGML_CUDA_NAME;
 +
      bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
  #ifdef GGML_CUDA_NO_PEER_COPY
      bool events = false;
-@@ -4087,6 +4149,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
+@@ -4087,6 +4143,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
          std::lock_guard<std::mutex> lock(mutex);
          if (!initialized) {
              ggml_backend_cuda_reg_context * ctx = new ggml_backend_cuda_reg_context;
@@ -169,7 +169,7 @@ index 87c6c34a4..816597d2f 100644
  
              for (int i = 0; i < ggml_cuda_info().device_count; i++) {
                  ggml_backend_cuda_device_context * dev_ctx = new ggml_backend_cuda_device_context;
-@@ -4102,6 +4165,17 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
+@@ -4102,6 +4159,14 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                  snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
                  dev_ctx->pci_bus_id = pci_bus_id;
  
@@ -181,9 +181,6 @@ index 87c6c34a4..816597d2f 100644
 +                dev_ctx->driver_major = driverVersion / 1000;
 +                dev_ctx->driver_minor = (driverVersion - (dev_ctx->driver_major * 1000)) / 10;
 +                dev_ctx->integrated = prop.integrated;
-+                dev_ctx->pciBusID = prop.pciBusID;
-+                dev_ctx->pciDeviceID = prop.pciDeviceID;
-+                dev_ctx->pciDomainID = prop.pciDomainID;
                  ggml_backend_dev_t dev = new ggml_backend_device {
                      /* .iface   = */ ggml_backend_cuda_device_interface,
                      /* .reg     = */ &reg,
@@ -209,7 +206,7 @@ index 1f06be80e..2f9ef2dc0 100644
  #define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
  #define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled
 diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
-index d0fb3bcca..80597b6ea 100644
+index d0fb3bcca..b63edd0c1 100644
 --- a/ggml/src/ggml-impl.h
 +++ b/ggml/src/ggml-impl.h
 @@ -638,6 +638,14 @@ static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx
@@ -221,7 +218,7 @@ index d0fb3bcca..80597b6ea 100644
 +GGML_API int ggml_nvml_get_device_memory(const char *uuid, size_t *free, size_t *total);
 +GGML_API void ggml_nvml_release();
 +GGML_API int ggml_hip_mgmt_init();
-+GGML_API int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total);
++GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
 +GGML_API void ggml_hip_mgmt_release();
 +
  #ifdef __cplusplus
@@ -247,12 +244,319 @@ index f2ff9f322..f356e4a0a 100644
      props->caps = {
          /* .async                 = */ true,
          /* .host_buffer           = */ false,
+diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+index ed83236f4..0bbcecd01 100644
+--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
++++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+@@ -231,6 +231,7 @@ class vk_memory_logger;
+ #endif
+ class vk_perf_logger;
+ static void ggml_vk_destroy_buffer(vk_buffer& buf);
++static std::string ggml_vk_get_device_id(int device);
+ 
+ static constexpr uint32_t mul_mat_vec_max_cols = 8;
+ static constexpr uint32_t p021_max_gqa_ratio = 8;
+@@ -11585,6 +11586,29 @@ static void ggml_vk_get_device_description(int device, char * description, size_
+     snprintf(description, description_size, "%s", props.deviceName.data());
+ }
+ 
++static std::string ggml_vk_get_device_id(int device) {
++    ggml_vk_instance_init();
++
++    std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
++
++    vk::PhysicalDeviceProperties2 props;
++    vk::PhysicalDeviceIDProperties deviceIDProps;
++    props.pNext = &deviceIDProps;
++    devices[device].getProperties2(&props);
++
++    const auto& uuid = deviceIDProps.deviceUUID;
++    char id[64];
++    snprintf(id, sizeof(id),
++        "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
++        uuid[0], uuid[1], uuid[2], uuid[3],
++        uuid[4], uuid[5],
++        uuid[6], uuid[7],
++        uuid[8], uuid[9],
++        uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]
++    );
++    return std::string(id);
++}
++
+ // backend interface
+ 
+ #define UNUSED GGML_UNUSED
+@@ -12391,31 +12415,103 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
+     ggml_vk_get_device_description(dev_idx, description, description_size);
+ }
+ 
+-void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
++std::string ggml_backend_vk_get_device_id(int device) {
+     GGML_ASSERT(device < (int) vk_instance.device_indices.size());
+-    GGML_ASSERT(device < (int) vk_instance.device_supports_membudget.size());
++    int dev_idx = vk_instance.device_indices[device];
++    return ggml_vk_get_device_id(dev_idx);
++}
++
++//////////////////////////
++
++struct ggml_backend_vk_device_context {
++    size_t device;
++    std::string name;
++    std::string description;
++    bool is_integrated_gpu;
++    // Combined string id in the form "dddd:bb:dd.f" (domain:bus:device.function)
++    std::string pci_id;
++    std::string id;
++    std::string uuid;
++    std::string numeric_id;
++    int major;
++    int minor;
++    int driver_major;
++    int driver_minor;
++};
++
++void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size_t * free, size_t * total) {
++    GGML_ASSERT(ctx->device < (int) vk_instance.device_indices.size());
++    GGML_ASSERT(ctx->device < (int) vk_instance.device_supports_membudget.size());
++
++    vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[ctx->device]];
+ 
+-    vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
+-    vk::PhysicalDeviceMemoryBudgetPropertiesEXT budgetprops;
+-    vk::PhysicalDeviceMemoryProperties2 memprops = {};
+-    bool membudget_supported = vk_instance.device_supports_membudget[device];
++    vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
++    vk::PhysicalDeviceProperties2 props2;
++    vkdev.getProperties2(&props2);
+ 
+-    if (membudget_supported) {
+-        memprops.pNext = &budgetprops;
++    if (!ctx->is_integrated_gpu)
++    {
++        // Use vendor specific management libraries for best VRAM reporting if available
++        switch (props2.properties.vendorID) {
++        case VK_VENDOR_ID_AMD:
++            if (ggml_hip_mgmt_init() == 0) {
++                int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total);
++                if (status == 0) {
++                    GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
++                    ggml_hip_mgmt_release();
++                    return;
++                }
++                ggml_hip_mgmt_release();
++            }
++            break;
++        case VK_VENDOR_ID_NVIDIA:
++            if (ggml_nvml_init() == 0) {
++                int status = ggml_nvml_get_device_memory(ctx->uuid.c_str(), free, total);
++                if (status == 0) {
++                    GGML_LOG_DEBUG("%s device %s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, ctx->uuid.c_str(), *free, *total);
++                    ggml_nvml_release();
++                    return;
++                }
++                ggml_nvml_release();
++            }
++            break;
++        }
+     }
+-    vkdev.getMemoryProperties2(&memprops);
++    // else fallback to memory budget if supported
+ 
+-    for (uint32_t i = 0; i < memprops.memoryProperties.memoryHeapCount; ++i) {
+-        const vk::MemoryHeap & heap = memprops.memoryProperties.memoryHeaps[i];
++    *total = 0;
++    *free = 0;
++    vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props;
++    vk::PhysicalDeviceMemoryProperties2 memprops2;
++    memprops2.pNext = &mem_budget_props;
++    vkdev.getMemoryProperties2(&memprops2);
++    for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) {
++        if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
++            *total += memprops2.memoryProperties.memoryHeaps[i].size;
++        } else if (ctx->is_integrated_gpu) {
++            // Include shared memory on iGPUs
++            *total += memprops2.memoryProperties.memoryHeaps[i].size;
++        }
++    }
++    for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) {
++        if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
++            *free += mem_budget_props.heapBudget[i];
++        } else if (ctx->is_integrated_gpu) {
++            *free += mem_budget_props.heapBudget[i];
++        }
++    }
++    if (*total > 0 && *free > 0) {
++        return;
++    } else if (*total > 0) {
++        *free = *total;
++        return;
++    }
+ 
++    // else just report the physical memory
++    for (const vk::MemoryHeap& heap : memprops2.memoryProperties.memoryHeaps) {
+         if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
+             *total = heap.size;
+-
+-            if (membudget_supported && i < budgetprops.heapUsage.size()) {
+-                *free = budgetprops.heapBudget[i] - budgetprops.heapUsage[i];
+-            } else {
+-                *free = heap.size;
+-            }
++            *free = heap.size;
+             break;
+         }
+     }
+@@ -12448,8 +12544,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
+         }
+     }
+ 
++    vk::PhysicalDeviceProperties2 props2;
+     if (!ext_support) {
+-        return "";
++        device.getProperties2(&props2);
++        if (props2.properties.vendorID != VK_VENDOR_ID_AMD) {
++            return "";
++        }
++        // AMD doesn't claim to support PCI ID, but actually does, so try anyway and check for non-zero
+     }
+ 
+     vk::PhysicalDeviceProperties2 props = {};
+@@ -12466,19 +12567,24 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
+ 
+     char pci_bus_id[16] = {};
+     snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.%x", pci_domain, pci_bus, pci_device, pci_function);
++    if (pci_domain == 0 && pci_bus == 0 && pci_device == 0 && pci_function == 0) {
++        return "";
++    }
+ 
+     return std::string(pci_bus_id);
+ }
+ 
+-//////////////////////////
+-
+-struct ggml_backend_vk_device_context {
+-    size_t device;
+-    std::string name;
+-    std::string description;
+-    bool is_integrated_gpu;
+-    std::string pci_bus_id;
+-};
++static bool ggml_backend_vk_parse_pci_bus_id(const std::string & id, int *domain, int *bus, int *device) {
++    if (id.empty()) return false;
++    unsigned int d = 0, b = 0, dev = 0, func = 0;
++    // Expected format: dddd:bb:dd.f (all hex)
++    int n = sscanf(id.c_str(), "%4x:%2x:%2x.%1x", &d, &b, &dev, &func);
++    if (n < 4) return false;
++    if (domain) *domain = (int) d;
++    if (bus) *bus = (int) b;
++    if (device) *device = (int) dev;
++    return true;
++}
+ 
+ static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
+     ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
+@@ -12490,9 +12596,14 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de
+     return ctx->description.c_str();
+ }
+ 
++static const char * ggml_backend_vk_device_get_id(ggml_backend_dev_t dev) {
++    ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
++    return ctx->id.c_str();
++}
++
+ static void ggml_backend_vk_device_get_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
+     ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)device->context;
+-    ggml_backend_vk_get_device_memory(ctx->device, free, total);
++    ggml_backend_vk_get_device_memory(ctx, free, total);
+ }
+ 
+ static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) {
+@@ -12516,8 +12627,9 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
+ 
+     props->name        = ggml_backend_vk_device_get_name(dev);
+     props->description = ggml_backend_vk_device_get_description(dev);
++    props->id          = ggml_backend_vk_device_get_id(dev);
+     props->type        = ggml_backend_vk_device_get_type(dev);
+-    props->device_id   = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
++    props->device_id   = ctx->pci_id.empty() ? nullptr : ctx->pci_id.c_str();
+     ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
+     props->caps = {
+         /* .async                 = */ false,
+@@ -12525,6 +12637,14 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
+         /* .buffer_from_host_ptr  = */ false,
+         /* .events                = */ false,
+     };
++
++    props->compute_major = ctx->major;
++    props->compute_minor = ctx->minor;
++    props->driver_major = ctx->driver_major;
++    props->driver_minor = ctx->driver_minor;
++    props->integrated = ctx->is_integrated_gpu;
++    props->library = GGML_VK_NAME;
++    props->numeric_id = ctx->numeric_id.c_str();
+ }
+ 
+ static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
+@@ -12953,6 +13073,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
+         static std::mutex mutex;
+         std::lock_guard<std::mutex> lock(mutex);
+         if (!initialized) {
++            std::vector<vk::PhysicalDevice> vk_devices = vk_instance.instance.enumeratePhysicalDevices();
++
+             for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) {
+                 ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context;
+                 char desc[256];
+@@ -12961,12 +13083,42 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
+                 ctx->name = GGML_VK_NAME + std::to_string(i);
+                 ctx->description = desc;
+                 ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu;
+-                ctx->pci_bus_id = ggml_backend_vk_get_device_pci_id(i);
++                ctx->pci_id = ggml_backend_vk_get_device_pci_id(i);
++                ctx->id = ggml_backend_vk_get_device_id(i);
+                 devices.push_back(new ggml_backend_device {
+                     /* .iface   = */ ggml_backend_vk_device_i,
+                     /* .reg     = */ reg,
+                     /* .context = */ ctx,
+                 });
++
++                // Gather additional information about the device
++                int dev_idx = vk_instance.device_indices[i];
++                vk::PhysicalDeviceProperties props1;
++                vk_devices[dev_idx].getProperties(&props1);
++                vk::PhysicalDeviceProperties2 props2;
++                vk::PhysicalDeviceIDProperties device_id_props;
++                vk::PhysicalDevicePCIBusInfoPropertiesEXT  pci_bus_props;
++                vk::PhysicalDeviceDriverProperties driver_props;
++                props2.pNext = &device_id_props;
++                device_id_props.pNext = &pci_bus_props;
++                pci_bus_props.pNext = &driver_props;
++                vk_devices[dev_idx].getProperties2(&props2);
++                std::ostringstream oss;
++                oss << std::hex << std::setfill('0');
++                int byteIdx = 0;
++                for (int i = 0; i < 16; ++i, ++byteIdx) {
++                    oss << std::setw(2) << static_cast<int>(device_id_props.deviceUUID[i]);
++                    if (byteIdx == 3 || byteIdx == 5 || byteIdx == 7 || byteIdx == 9) {
++                        oss << '-';
++                    }
++                }
++                ctx->uuid = oss.str();
++                ctx->major = 0;
++                ctx->minor = 0;
++                // TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
++                ctx->driver_major = 0;
++                ctx->driver_minor = 0;
++                ctx->numeric_id = std::to_string(i);
+             }
+             initialized = true;
+         }
 diff --git a/ggml/src/mem_hip.cpp b/ggml/src/mem_hip.cpp
 new file mode 100644
-index 000000000..8ef19b8cf
+index 000000000..5a7f5d465
 --- /dev/null
 +++ b/ggml/src/mem_hip.cpp
-@@ -0,0 +1,449 @@
+@@ -0,0 +1,452 @@
 +#include "ggml.h"
 +
 +#ifdef _WIN32
@@ -586,7 +890,7 @@ index 000000000..8ef19b8cf
 +    if (gpus != NULL) gpus->pVtbl->Release(gpus); \
 +    if (gpu != NULL) gpu->pVtbl->Release(gpu)
 +
-+int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total) {
++int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
 +    std::lock_guard<std::mutex> lock(ggml_adlx_lock);
 +    if (adlx.handle == NULL) {
 +        GGML_LOG_INFO("%s ADLX was not initialized\n", __func__);
@@ -598,9 +902,13 @@ index 000000000..8ef19b8cf
 +    IADLXGPU* gpu = NULL;
 +    IADLXGPUMetrics *gpuMetrics = NULL;
 +    ADLX_RESULT status;
-+    // The "UniqueID" exposed in ADLX is the PCI Bus and Device IDs 
-+    adlx_int target = (pci_bus_id << 8) | (pci_device_id & 0xff);
 +
++    uint32_t pci_domain, pci_bus, pci_device, pci_function;
++    if (sscanf(id, "%04x:%02x:%02x.%x", &pci_domain, &pci_bus, &pci_device, &pci_function) != 4) {
++        // TODO - parse other formats?
++        GGML_LOG_DEBUG("%s device ID was not a PCI ID %s\n", __func__, id);
++        return ADLX_NOT_FOUND;
++    }
 +    status = adlx.sys->pVtbl->GetPerformanceMonitoringServices(adlx.sys, &perfMonitoringServices);
 +    if (ADLX_FAILED(status)) {
 +        GGML_LOG_INFO("%s GetPerformanceMonitoringServices failed %d\n", __func__, status);
@@ -623,16 +931,15 @@ index 000000000..8ef19b8cf
 +            GGML_LOG_INFO("%s %d] At_GPUList failed %d\n", __func__, crt, status);
 +            continue;
 +        }
-+        adlx_int id;
-+        status = gpu->pVtbl->UniqueId(gpu, &id);
++        adlx_int uniqueID;
++        status = gpu->pVtbl->UniqueId(gpu, &uniqueID);
 +        if (ADLX_FAILED(status)) {
 +            GGML_LOG_INFO("%s %d] UniqueId lookup failed %d\n", __func__, crt, status);
 +            gpu->pVtbl->Release(gpu);
 +            gpu = NULL;
 +            continue;
 +        }
-+        if (id != target) {
-+            GGML_LOG_DEBUG("%s %d] GPU UniqueId: %x does not match target %02x %02x\n", __func__, crt, id, pci_bus_id, pci_device_id);
++        if ((((uniqueID >> 8) & 0xff) != pci_bus) || ((uniqueID & 0xff) != pci_device)) {
 +            gpu->pVtbl->Release(gpu);
 +            gpu = NULL;
 +            continue;
@@ -695,7 +1002,7 @@ index 000000000..8ef19b8cf
 +    return -1;
 +}
 +void ggml_hip_mgmt_release() {}
-+int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total) {
++int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
 +    return -1;
 +}
 +
diff --git a/llama/patches/0029-NVML-fallback-for-unified-memory-GPUs.patch b/llama/patches/0027-NVML-fallback-for-unified-memory-GPUs.patch
similarity index 99%
rename from llama/patches/0029-NVML-fallback-for-unified-memory-GPUs.patch
rename to llama/patches/0027-NVML-fallback-for-unified-memory-GPUs.patch
index 9ba11168..ec3fdbaa 100644
--- a/llama/patches/0029-NVML-fallback-for-unified-memory-GPUs.patch
+++ b/llama/patches/0027-NVML-fallback-for-unified-memory-GPUs.patch
@@ -8,7 +8,7 @@ Subject: [PATCH] NVML fallback for unified memory GPUs
  1 file changed, 68 insertions(+), 3 deletions(-)
 
 diff --git a/ggml/src/mem_nvml.cpp b/ggml/src/mem_nvml.cpp
-index c9073cef..f473a2a2 100644
+index c9073cef0..f473a2a2c 100644
 --- a/ggml/src/mem_nvml.cpp
 +++ b/ggml/src/mem_nvml.cpp
 @@ -13,6 +13,7 @@
diff --git a/llama/patches/0027-vulkan-get-GPU-ID-ollama-v0.11.5.patch b/llama/patches/0027-vulkan-get-GPU-ID-ollama-v0.11.5.patch
deleted file mode 100644
index 997dd386..00000000
--- a/llama/patches/0027-vulkan-get-GPU-ID-ollama-v0.11.5.patch
+++ /dev/null
@@ -1,95 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Xiaodong Ye <xiaodong.ye@mthreads.com>
-Date: Mon, 18 Aug 2025 12:48:07 +0800
-Subject: [PATCH] vulkan: get GPU ID (ollama v0.11.5)
-
-Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
----
- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 37 ++++++++++++++++++++++++++++
- 1 file changed, 37 insertions(+)
-
-diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-index 061cd078..adea7783 100644
---- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-@@ -11588,6 +11588,29 @@ static void ggml_vk_get_device_description(int device, char * description, size_
-     snprintf(description, description_size, "%s", props.deviceName.data());
- }
-
-+static std::string ggml_vk_get_device_id(int device) {
-+    ggml_vk_instance_init();
-+
-+    std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
-+
-+    vk::PhysicalDeviceProperties2 props;
-+    vk::PhysicalDeviceIDProperties deviceIDProps;
-+    props.pNext = &deviceIDProps;
-+    devices[device].getProperties2(&props);
-+
-+    const auto& uuid = deviceIDProps.deviceUUID;
-+    char id[64];
-+    snprintf(id, sizeof(id),
-+        "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
-+        uuid[0], uuid[1], uuid[2], uuid[3],
-+        uuid[4], uuid[5],
-+        uuid[6], uuid[7],
-+        uuid[8], uuid[9],
-+        uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]
-+    );
-+    return std::string(id);
-+}
-+
- // backend interface
-
- #define UNUSED GGML_UNUSED
-@@ -12394,6 +12417,12 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
-     ggml_vk_get_device_description(dev_idx, description, description_size);
- }
-
-+std::string ggml_backend_vk_get_device_id(int device) {
-+    GGML_ASSERT(device < (int) vk_instance.device_indices.size());
-+    int dev_idx = vk_instance.device_indices[device];
-+    return ggml_vk_get_device_id(dev_idx);
-+}
-+
- void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
-     GGML_ASSERT(device < (int) vk_instance.device_indices.size());
-     GGML_ASSERT(device < (int) vk_instance.device_supports_membudget.size());
-@@ -12481,6 +12510,7 @@ struct ggml_backend_vk_device_context {
-     std::string description;
-     bool is_integrated_gpu;
-     std::string pci_bus_id;
-+    std::string id;
- };
-
- static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
-@@ -12493,6 +12523,11 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de
-     return ctx->description.c_str();
- }
-
-+static const char * ggml_backend_vk_device_get_id(ggml_backend_dev_t dev) {
-+    ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
-+    return ctx->id.c_str();
-+}
-+
- static void ggml_backend_vk_device_get_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
-     ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)device->context;
-     ggml_backend_vk_get_device_memory(ctx->device, free, total);
-@@ -12519,6 +12554,7 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
-
-     props->name        = ggml_backend_vk_device_get_name(dev);
-     props->description = ggml_backend_vk_device_get_description(dev);
-+    props->id          = ggml_backend_vk_device_get_id(dev);
-     props->type        = ggml_backend_vk_device_get_type(dev);
-     props->device_id   = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
-     ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
-@@ -12965,6 +13001,7 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
-                 ctx->description = desc;
-                 ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu;
-                 ctx->pci_bus_id = ggml_backend_vk_get_device_pci_id(i);
-+                ctx->id = ggml_backend_vk_get_device_id(i);
-                 devices.push_back(new ggml_backend_device {
-                     /* .iface   = */ ggml_backend_vk_device_i,
-                     /* .reg     = */ reg,
--- 
-2.51.0
\ No newline at end of file
diff --git a/llama/patches/0030-CUDA-Changing-the-CUDA-scheduling-strategy-to-spin-1.patch b/llama/patches/0028-CUDA-Changing-the-CUDA-scheduling-strategy-to-spin-1.patch
similarity index 97%
rename from llama/patches/0030-CUDA-Changing-the-CUDA-scheduling-strategy-to-spin-1.patch
rename to llama/patches/0028-CUDA-Changing-the-CUDA-scheduling-strategy-to-spin-1.patch
index c3c7fedf..f5861a8c 100644
--- a/llama/patches/0030-CUDA-Changing-the-CUDA-scheduling-strategy-to-spin-1.patch
+++ b/llama/patches/0028-CUDA-Changing-the-CUDA-scheduling-strategy-to-spin-1.patch
@@ -28,7 +28,7 @@ Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
  1 file changed, 9 insertions(+)
 
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index 6a278b5e9..87941f872 100644
+index b075a18be..d62f412d6 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
 @@ -340,6 +340,15 @@ static ggml_cuda_device_info ggml_cuda_init() {
diff --git a/llama/patches/0028-vulkan-pci-and-memory.patch b/llama/patches/0028-vulkan-pci-and-memory.patch
deleted file mode 100644
index c20ccf5c..00000000
--- a/llama/patches/0028-vulkan-pci-and-memory.patch
+++ /dev/null
@@ -1,254 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Daniel Hiltgen <daniel@ollama.com>
-Date:   Fri Sep 5 08:25:03 2025 -0700
-Subject: [PATCH] Vulkan PCI and Memory
-
----
- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 176 ++++++++++++++++++++++-----
- 1 file changed, 145 insertions(+), 31 deletions(-)
-
-diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-index adea7783..fb7204ce 100644
---- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-@@ -12423,31 +12423,99 @@ std::string ggml_backend_vk_get_device_id(int device) {
-     return ggml_vk_get_device_id(dev_idx);
- }
- 
--void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
--    GGML_ASSERT(device < (int) vk_instance.device_indices.size());
--    GGML_ASSERT(device < (int) vk_instance.device_supports_membudget.size());
-+//////////////////////////
-+
-+struct ggml_backend_vk_device_context {
-+    size_t device;
-+    std::string name;
-+    std::string description;
-+    bool is_integrated_gpu;
-+    // Combined string id in the form "dddd:bb:dd.f" (domain:bus:device.function)
-+    std::string pci_id;
-+    std::string id;
-+    std::string uuid;
-+    int major;
-+    int minor;
-+    int driver_major;
-+    int driver_minor;
-+    int pci_bus_id;
-+    int pci_device_id;
-+    int pci_domain_id;
-+};
-+
-+void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size_t * free, size_t * total) {
-+    GGML_ASSERT(ctx->device < (int) vk_instance.device_indices.size());
-+    GGML_ASSERT(ctx->device < (int) vk_instance.device_supports_membudget.size());
-+
-+    vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[ctx->device]];
- 
--    vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
--    vk::PhysicalDeviceMemoryBudgetPropertiesEXT budgetprops;
--    vk::PhysicalDeviceMemoryProperties2 memprops = {};
--    bool membudget_supported = vk_instance.device_supports_membudget[device];
-+    vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
-+    vk::PhysicalDeviceProperties2 props2;
-+    vkdev.getProperties2(&props2);
- 
--    if (membudget_supported) {
--        memprops.pNext = &budgetprops;
-+    if (!ctx->is_integrated_gpu)
-+    {
-+        // Use vendor specific management libraries for best VRAM reporting if available
-+        switch (props2.properties.vendorID) {
-+        case VK_VENDOR_ID_AMD:
-+            if (ggml_hip_mgmt_init() == 0) {
-+                int status = ggml_hip_get_device_memory(ctx->pci_bus_id, ctx->pci_device_id, free, total);
-+                if (status == 0) {
-+                    GGML_LOG_DEBUG("%s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, *free, *total);
-+                    ggml_hip_mgmt_release();
-+                    return;
-+                }
-+                ggml_hip_mgmt_release();
-+            }
-+            break;
-+        case VK_VENDOR_ID_NVIDIA:
-+            if (ggml_nvml_init() == 0) {
-+                int status = ggml_nvml_get_device_memory(ctx->uuid.c_str(), free, total);
-+                if (status == 0) {
-+                    GGML_LOG_DEBUG("%s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, *free, *total);
-+                    ggml_nvml_release();
-+                    return;
-+                }
-+                ggml_nvml_release();
-+            }
-+            break;
-+        }
-     }
--    vkdev.getMemoryProperties2(&memprops);
-+    // else fallback to memory budget if supported
- 
--    for (uint32_t i = 0; i < memprops.memoryProperties.memoryHeapCount; ++i) {
--        const vk::MemoryHeap & heap = memprops.memoryProperties.memoryHeaps[i];
-+    *total = 0;
-+    *free = 0;
-+    vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props;
-+    vk::PhysicalDeviceMemoryProperties2 memprops2;
-+    memprops2.pNext = &mem_budget_props;
-+    vkdev.getMemoryProperties2(&memprops2);
-+    for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) {
-+        if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
-+            *total += memprops2.memoryProperties.memoryHeaps[i].size;
-+        } else if (ctx->is_integrated_gpu) {
-+            // Include shared memory on iGPUs
-+            *total += memprops2.memoryProperties.memoryHeaps[i].size;
-+        }
-+    }
-+    for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) {
-+        if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
-+            *free += mem_budget_props.heapBudget[i];
-+        } else if (ctx->is_integrated_gpu) {
-+            *free += mem_budget_props.heapBudget[i];
-+        }
-+    }
-+    if (*total > 0 && *free > 0) {
-+        return;
-+    } else if (*total > 0) {
-+        *free = *total;
-+        return;
-+    }
- 
-+    // else just report the physical memory
-+    for (const vk::MemoryHeap& heap : memprops2.memoryProperties.memoryHeaps) {
-         if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
-             *total = heap.size;
--
--            if (membudget_supported && i < budgetprops.heapUsage.size()) {
--                *free = budgetprops.heapBudget[i] - budgetprops.heapUsage[i];
--            } else {
--                *free = heap.size;
--            }
-+            *free = heap.size;
-             break;
-         }
-     }
-@@ -12502,16 +12570,17 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
-     return std::string(pci_bus_id);
- }
- 
--//////////////////////////
--
--struct ggml_backend_vk_device_context {
--    size_t device;
--    std::string name;
--    std::string description;
--    bool is_integrated_gpu;
--    std::string pci_bus_id;
--    std::string id;
--};
-+static bool ggml_backend_vk_parse_pci_bus_id(const std::string & id, int *domain, int *bus, int *device) {
-+    if (id.empty()) return false;
-+    unsigned int d = 0, b = 0, dev = 0, func = 0;
-+    // Expected format: dddd:bb:dd.f (all hex)
-+    int n = sscanf(id.c_str(), "%4x:%2x:%2x.%1x", &d, &b, &dev, &func);
-+    if (n < 4) return false;
-+    if (domain) *domain = (int) d;
-+    if (bus) *bus = (int) b;
-+    if (device) *device = (int) dev;
-+    return true;
-+}
- 
- static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
-     ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
-@@ -12530,7 +12599,7 @@ static const char * ggml_backend_vk_device_get_id(ggml_backend_dev_t dev) {
- 
- static void ggml_backend_vk_device_get_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
-     ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)device->context;
--    ggml_backend_vk_get_device_memory(ctx->device, free, total);
-+    ggml_backend_vk_get_device_memory(ctx, free, total);
- }
- 
- static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) {
-@@ -12556,7 +12625,7 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
-     props->description = ggml_backend_vk_device_get_description(dev);
-     props->id          = ggml_backend_vk_device_get_id(dev);
-     props->type        = ggml_backend_vk_device_get_type(dev);
--    props->device_id   = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
-+    props->device_id   = ctx->pci_id.empty() ? nullptr : ctx->pci_id.c_str();
-     ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
-     props->caps = {
-         /* .async                 = */ false,
-@@ -12564,6 +12633,17 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
-         /* .buffer_from_host_ptr  = */ false,
-         /* .events                = */ false,
-     };
-+
-+    props->compute_major = ctx->major;
-+    props->compute_minor = ctx->minor;
-+    props->driver_major = ctx->driver_major;
-+    props->driver_minor = ctx->driver_minor;
-+    props->integrated = ctx->is_integrated_gpu;
-+    props->pci_bus_id = ctx->pci_bus_id;
-+    props->pci_device_id = ctx->pci_device_id;
-+    props->pci_domain_id = ctx->pci_domain_id;
-+    props->library = GGML_VK_NAME;
-+    props->numeric_id = ctx->id.empty() ? nullptr : ctx->id.c_str();
- }
- 
- static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
-@@ -12992,6 +13071,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
-         static std::mutex mutex;
-         std::lock_guard<std::mutex> lock(mutex);
-         if (!initialized) {
-+            std::vector<vk::PhysicalDevice> vk_devices = vk_instance.instance.enumeratePhysicalDevices();
-+
-             for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) {
-                 ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context;
-                 char desc[256];
-@@ -13000,13 +13081,46 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
-                 ctx->name = GGML_VK_NAME + std::to_string(i);
-                 ctx->description = desc;
-                 ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu;
--                ctx->pci_bus_id = ggml_backend_vk_get_device_pci_id(i);
-+                ctx->pci_id = ggml_backend_vk_get_device_pci_id(i);
-                 ctx->id = ggml_backend_vk_get_device_id(i);
-                 devices.push_back(new ggml_backend_device {
-                     /* .iface   = */ ggml_backend_vk_device_i,
-                     /* .reg     = */ reg,
-                     /* .context = */ ctx,
-                 });
-+
-+                // Gather additional information about the device
-+                int dev_idx = vk_instance.device_indices[i];
-+                vk::PhysicalDeviceProperties props1;
-+                vk_devices[dev_idx].getProperties(&props1);
-+                vk::PhysicalDeviceProperties2 props2;
-+                vk::PhysicalDeviceIDProperties device_id_props;
-+                vk::PhysicalDevicePCIBusInfoPropertiesEXT  pci_bus_props;
-+                vk::PhysicalDeviceDriverProperties driver_props;
-+                props2.pNext = &device_id_props;
-+                device_id_props.pNext = &pci_bus_props;
-+                pci_bus_props.pNext = &driver_props;
-+                vk_devices[dev_idx].getProperties2(&props2);
-+                std::ostringstream oss;
-+                oss << std::hex << std::setfill('0');
-+                oss << "GPU-";
-+                int byteIdx = 0;
-+                for (int i = 0; i < 16; ++i, ++byteIdx) {
-+                    oss << std::setw(2) << static_cast<int>(device_id_props.deviceUUID[i]);
-+                    if (byteIdx == 3 || byteIdx == 5 || byteIdx == 7 || byteIdx == 9) {
-+                        oss << '-';
-+                    }
-+                }
-+                ctx->uuid = oss.str();
-+                ctx->pci_bus_id = pci_bus_props.pciBus;
-+                ctx->pci_device_id = pci_bus_props.pciDevice;
-+                ctx->pci_domain_id = pci_bus_props.pciDomain;
-+                ctx->id = std::to_string(i);
-+                ctx->major = 0;
-+                ctx->minor = 0;
-+                // TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
-+                ctx->driver_major = 0;
-+                ctx->driver_minor = 0;
-             }
-             initialized = true;
-         }
--- 
-2.51.0
\ No newline at end of file
diff --git a/llama/patches/0031-report-LoadLibrary-failures.patch b/llama/patches/0029-report-LoadLibrary-failures.patch
similarity index 100%
rename from llama/patches/0031-report-LoadLibrary-failures.patch
rename to llama/patches/0029-report-LoadLibrary-failures.patch
diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go
index 64aae141..3feb5b5d 100644
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -725,7 +725,9 @@ func (b *Backend) BackendDevices() []ml.DeviceInfo {
 		if props.library != nil {
 			info.Library = C.GoString(props.library)
 		}
-		info.PCIID = fmt.Sprintf("%02x:%02x.%x", props.pci_bus_id, props.pci_device_id, props.pci_domain_id)
+		if props.device_id != nil {
+			info.PCIID = C.GoString(props.device_id)
+		}
 		info.LibraryPath = ggml.LibPaths()
 		if props.numeric_id != nil {
 			info.FilteredID = C.GoString(props.numeric_id)
diff --git a/ml/backend/ggml/ggml/include/ggml-backend.h b/ml/backend/ggml/ggml/include/ggml-backend.h
index 094fc3c8..80983524 100644
--- a/ml/backend/ggml/ggml/include/ggml-backend.h
+++ b/ml/backend/ggml/ggml/include/ggml-backend.h
@@ -174,9 +174,6 @@ extern "C" {
         int compute_major;
         int compute_minor;
         int integrated;
-        int pci_bus_id;
-        int pci_device_id;
-        int pci_domain_id;
         const char *library;
         // number with which the devices are accessed (Vulkan)
         const char *numeric_id;
diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
index f9cf2d4f..d62f412d 100644
--- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -3513,9 +3513,6 @@ struct ggml_backend_cuda_device_context {
     int driver_major;
     int driver_minor;
     int integrated;
-    int pciBusID;
-    int pciDeviceID;
-    int pciDomainID;
 };
 
 static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -3539,9 +3536,9 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t *
 
 #if defined(GGML_USE_HIP)
     if (ggml_hip_mgmt_init() == 0) {
-        int status = ggml_hip_get_device_memory(ctx->pciBusID, ctx->pciDeviceID, free, total);
+        int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total);
         if (status == 0) {
-            GGML_LOG_DEBUG("%s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, *free, *total);
+            GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total);
             ggml_hip_mgmt_release();
             return;
         }
@@ -3551,7 +3548,7 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t *
     if (ggml_nvml_init() == 0) {
         int status = ggml_nvml_get_device_memory(ctx->id.c_str(), free, total);
         if (status == 0) {
-            GGML_LOG_DEBUG("%s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, *free, *total);
+            GGML_LOG_DEBUG("%s device %s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, ctx->id.c_str(), *free, *total);
             ggml_nvml_release();
             return;
         }
@@ -3591,9 +3588,6 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
     props->driver_major = ctx->driver_major;
     props->driver_minor = ctx->driver_minor;
     props->integrated = ctx->integrated;
-    props->pci_bus_id = ctx->pciBusID;
-    props->pci_device_id = ctx->pciDeviceID;
-    props->pci_domain_id = ctx->pciDomainID;
     props->library = GGML_CUDA_NAME;
 
     bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
@@ -4182,9 +4176,6 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                 dev_ctx->driver_major = driverVersion / 1000;
                 dev_ctx->driver_minor = (driverVersion - (dev_ctx->driver_major * 1000)) / 10;
                 dev_ctx->integrated = prop.integrated;
-                dev_ctx->pciBusID = prop.pciBusID;
-                dev_ctx->pciDeviceID = prop.pciDeviceID;
-                dev_ctx->pciDomainID = prop.pciDomainID;
                 ggml_backend_dev_t dev = new ggml_backend_device {
                     /* .iface   = */ ggml_backend_cuda_device_interface,
                     /* .reg     = */ &reg,
diff --git a/ml/backend/ggml/ggml/src/ggml-impl.h b/ml/backend/ggml/ggml/src/ggml-impl.h
index 80597b6e..b63edd0c 100644
--- a/ml/backend/ggml/ggml/src/ggml-impl.h
+++ b/ml/backend/ggml/ggml/src/ggml-impl.h
@@ -643,7 +643,7 @@ GGML_API int ggml_nvml_init();
 GGML_API int ggml_nvml_get_device_memory(const char *uuid, size_t *free, size_t *total);
 GGML_API void ggml_nvml_release();
 GGML_API int ggml_hip_mgmt_init();
-GGML_API int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total);
+GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
 GGML_API void ggml_hip_mgmt_release();
 
 #ifdef __cplusplus
diff --git a/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 564bc4a7..0bbcecd0 100644
--- a/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -231,6 +231,7 @@ class vk_memory_logger;
 #endif
 class vk_perf_logger;
 static void ggml_vk_destroy_buffer(vk_buffer& buf);
+static std::string ggml_vk_get_device_id(int device);
 
 static constexpr uint32_t mul_mat_vec_max_cols = 8;
 static constexpr uint32_t p021_max_gqa_ratio = 8;
@@ -11598,7 +11599,7 @@ static std::string ggml_vk_get_device_id(int device) {
     const auto& uuid = deviceIDProps.deviceUUID;
     char id[64];
     snprintf(id, sizeof(id),
-        "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+        "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
         uuid[0], uuid[1], uuid[2], uuid[3],
         uuid[4], uuid[5],
         uuid[6], uuid[7],
@@ -12431,13 +12432,11 @@ struct ggml_backend_vk_device_context {
     std::string pci_id;
     std::string id;
     std::string uuid;
+    std::string numeric_id;
     int major;
     int minor;
     int driver_major;
     int driver_minor;
-    int pci_bus_id;
-    int pci_device_id;
-    int pci_domain_id;
 };
 
 void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size_t * free, size_t * total) {
@@ -12456,9 +12455,9 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
         switch (props2.properties.vendorID) {
         case VK_VENDOR_ID_AMD:
             if (ggml_hip_mgmt_init() == 0) {
-                int status = ggml_hip_get_device_memory(ctx->pci_bus_id, ctx->pci_device_id, free, total);
+                int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total);
                 if (status == 0) {
-                    GGML_LOG_DEBUG("%s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, *free, *total);
+                    GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
                     ggml_hip_mgmt_release();
                     return;
                 }
@@ -12469,7 +12468,7 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
             if (ggml_nvml_init() == 0) {
                 int status = ggml_nvml_get_device_memory(ctx->uuid.c_str(), free, total);
                 if (status == 0) {
-                    GGML_LOG_DEBUG("%s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, *free, *total);
+                    GGML_LOG_DEBUG("%s device %s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, ctx->uuid.c_str(), *free, *total);
                     ggml_nvml_release();
                     return;
                 }
@@ -12545,8 +12544,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
         }
     }
 
+    vk::PhysicalDeviceProperties2 props2;
     if (!ext_support) {
-        return "";
+        device.getProperties2(&props2);
+        if (props2.properties.vendorID != VK_VENDOR_ID_AMD) {
+            return "";
+        }
+        // AMD doesn't claim to support PCI ID, but actually does, so try anyway and check for non-zero
     }
 
     vk::PhysicalDeviceProperties2 props = {};
@@ -12563,6 +12567,9 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
 
     char pci_bus_id[16] = {};
     snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.%x", pci_domain, pci_bus, pci_device, pci_function);
+    if (pci_domain == 0 && pci_bus == 0 && pci_device == 0 && pci_function == 0) {
+        return "";
+    }
 
     return std::string(pci_bus_id);
 }
@@ -12636,11 +12643,8 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
     props->driver_major = ctx->driver_major;
     props->driver_minor = ctx->driver_minor;
     props->integrated = ctx->is_integrated_gpu;
-    props->pci_bus_id = ctx->pci_bus_id;
-    props->pci_device_id = ctx->pci_device_id;
-    props->pci_domain_id = ctx->pci_domain_id;
     props->library = GGML_VK_NAME;
-    props->numeric_id = ctx->id.empty() ? nullptr : ctx->id.c_str();
+    props->numeric_id = ctx->numeric_id.c_str();
 }
 
 static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
@@ -13101,7 +13105,6 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
                 vk_devices[dev_idx].getProperties2(&props2);
                 std::ostringstream oss;
                 oss << std::hex << std::setfill('0');
-                oss << "GPU-";
                 int byteIdx = 0;
                 for (int i = 0; i < 16; ++i, ++byteIdx) {
                     oss << std::setw(2) << static_cast<int>(device_id_props.deviceUUID[i]);
@@ -13110,15 +13113,12 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
                     }
                 }
                 ctx->uuid = oss.str();
-                ctx->pci_bus_id = pci_bus_props.pciBus;
-                ctx->pci_device_id = pci_bus_props.pciDevice;
-                ctx->pci_domain_id = pci_bus_props.pciDomain;
-                ctx->id = std::to_string(i);
                 ctx->major = 0;
                 ctx->minor = 0;
                 // TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
                 ctx->driver_major = 0;
                 ctx->driver_minor = 0;
+                ctx->numeric_id = std::to_string(i);
             }
             initialized = true;
         }
diff --git a/ml/backend/ggml/ggml/src/mem_hip.cpp b/ml/backend/ggml/ggml/src/mem_hip.cpp
index 8ef19b8c..5a7f5d46 100644
--- a/ml/backend/ggml/ggml/src/mem_hip.cpp
+++ b/ml/backend/ggml/ggml/src/mem_hip.cpp
@@ -331,7 +331,7 @@ void ggml_hip_mgmt_release() {
     if (gpus != NULL) gpus->pVtbl->Release(gpus); \
     if (gpu != NULL) gpu->pVtbl->Release(gpu)
 
-int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total) {
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
     std::lock_guard<std::mutex> lock(ggml_adlx_lock);
     if (adlx.handle == NULL) {
         GGML_LOG_INFO("%s ADLX was not initialized\n", __func__);
@@ -343,9 +343,13 @@ int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free,
     IADLXGPU* gpu = NULL;
     IADLXGPUMetrics *gpuMetrics = NULL;
     ADLX_RESULT status;
-    // The "UniqueID" exposed in ADLX is the PCI Bus and Device IDs 
-    adlx_int target = (pci_bus_id << 8) | (pci_device_id & 0xff);
 
+    uint32_t pci_domain, pci_bus, pci_device, pci_function;
+    if (sscanf(id, "%04x:%02x:%02x.%x", &pci_domain, &pci_bus, &pci_device, &pci_function) != 4) {
+        // TODO - parse other formats?
+        GGML_LOG_DEBUG("%s device ID was not a PCI ID %s\n", __func__, id);
+        return ADLX_NOT_FOUND;
+    }
     status = adlx.sys->pVtbl->GetPerformanceMonitoringServices(adlx.sys, &perfMonitoringServices);
     if (ADLX_FAILED(status)) {
         GGML_LOG_INFO("%s GetPerformanceMonitoringServices failed %d\n", __func__, status);
@@ -368,16 +372,15 @@ int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free,
             GGML_LOG_INFO("%s %d] At_GPUList failed %d\n", __func__, crt, status);
             continue;
         }
-        adlx_int id;
-        status = gpu->pVtbl->UniqueId(gpu, &id);
+        adlx_int uniqueID;
+        status = gpu->pVtbl->UniqueId(gpu, &uniqueID);
         if (ADLX_FAILED(status)) {
             GGML_LOG_INFO("%s %d] UniqueId lookup failed %d\n", __func__, crt, status);
             gpu->pVtbl->Release(gpu);
             gpu = NULL;
             continue;
         }
-        if (id != target) {
-            GGML_LOG_DEBUG("%s %d] GPU UniqueId: %x does not match target %02x %02x\n", __func__, crt, id, pci_bus_id, pci_device_id);
+        if ((((uniqueID >> 8) & 0xff) != pci_bus) || ((uniqueID & 0xff) != pci_device)) {
             gpu->pVtbl->Release(gpu);
             gpu = NULL;
             continue;
@@ -440,7 +443,7 @@ int ggml_hip_mgmt_init() {
     return -1;
 }
 void ggml_hip_mgmt_release() {}
-int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total) {
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
     return -1;
 }
 
diff --git a/ml/device.go b/ml/device.go
index 39fba7d1..57c3976b 100644
--- a/ml/device.go
+++ b/ml/device.go
@@ -391,6 +391,10 @@ func (a DeviceInfo) Compare(b DeviceInfo) DeviceComparison {
 	if a.PCIID != b.PCIID {
 		return UniqueDevice
 	}
+	// If PCIID is empty, we have to use ID + library for uniqueness
+	if a.PCIID == "" && a.DeviceID != b.DeviceID {
+		return UniqueDevice
+	}
 	if a.Library == b.Library {
 		return SameBackendDevice
 	}
@@ -454,13 +458,13 @@ func (d DeviceInfo) updateVisibleDevicesEnv(env map[string]string) {
 	var envVar string
 	switch d.Library {
 	case "ROCm":
+		// ROCm must be filtered as it can crash the runner on unsupported devices
 		envVar = "ROCR_VISIBLE_DEVICES"
 		if runtime.GOOS != "linux" {
 			envVar = "HIP_VISIBLE_DEVICES"
 		}
-	case "Vulkan":
-		envVar = "GGML_VK_VISIBLE_DEVICES"
 	default:
+		// CUDA and Vulkan are not filtered via env var, but via scheduling decisions
 		return
 	}
 	v, existing := env[envVar]