diff --git a/llama/patches/0029-NVML-fallback-for-unified-memory-GPUs.patch b/llama/patches/0029-NVML-fallback-for-unified-memory-GPUs.patch new file mode 100644 index 00000000..9ba11168 --- /dev/null +++ b/llama/patches/0029-NVML-fallback-for-unified-memory-GPUs.patch @@ -0,0 +1,137 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Santosh Bhavani +Date: Wed, 15 Oct 2025 09:29:51 -0700 +Subject: [PATCH] NVML fallback for unified memory GPUs + +--- + ggml/src/mem_nvml.cpp | 71 +++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 68 insertions(+), 3 deletions(-) + +diff --git a/ggml/src/mem_nvml.cpp b/ggml/src/mem_nvml.cpp +index c9073cef..f473a2a2 100644 +--- a/ggml/src/mem_nvml.cpp ++++ b/ggml/src/mem_nvml.cpp +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + #ifdef _WIN32 + # define WIN32_LEAN_AND_MEAN +@@ -23,6 +24,8 @@ + #else + # include + # include ++# include ++# include + #endif + + namespace fs = std::filesystem; +@@ -79,12 +82,36 @@ struct { + nvmlReturn_t (*nvmlShutdown)(void); + nvmlReturn_t (*nvmlDeviceGetHandleByUUID)(const char *, nvmlDevice_t *); + nvmlReturn_t (*nvmlDeviceGetMemoryInfo)(nvmlDevice_t, nvmlMemory_t *); ++ nvmlReturn_t (*nvmlDeviceGetName)(nvmlDevice_t, char *, unsigned int); + const char * (*nvmlErrorString)(nvmlReturn_t result); +-} nvml { NULL, NULL, NULL, NULL, NULL }; ++} nvml { NULL, NULL, NULL, NULL, NULL, NULL, NULL }; + static std::mutex ggml_nvml_lock; + + extern "C" { + ++#ifndef _WIN32 ++// Helper function to get available memory from /proc/meminfo on Linux ++// Returns MemAvailable as calculated by the kernel ++static size_t get_mem_available() { ++ std::ifstream meminfo("/proc/meminfo"); ++ if (!meminfo.is_open()) { ++ return 0; ++ } ++ ++ std::string line; ++ while (std::getline(meminfo, line)) { ++ if (line.find("MemAvailable:") == 0) { ++ size_t available_kb; ++ sscanf(line.c_str(), "MemAvailable: %zu kB", &available_kb); ++ // Convert from kB to bytes ++ return available_kb * 1024; ++ } ++ } ++ ++ return 0; ++} ++#endif ++ + int ggml_nvml_init() { + std::lock_guard lock(ggml_nvml_lock); + if (nvml.handle != NULL) { +@@ -117,8 +144,9 @@ int ggml_nvml_init() { + nvml.nvmlShutdown = (nvmlReturn_enum (*)()) GetProcAddress((HMODULE)(nvml.handle), "nvmlShutdown"); + nvml.nvmlDeviceGetHandleByUUID = (nvmlReturn_t (*)(const char *, nvmlDevice_t *)) GetProcAddress((HMODULE)(nvml.handle), "nvmlDeviceGetHandleByUUID"); + nvml.nvmlDeviceGetMemoryInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlMemory_t *)) GetProcAddress((HMODULE)(nvml.handle), "nvmlDeviceGetMemoryInfo"); ++ nvml.nvmlDeviceGetName = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int)) GetProcAddress((HMODULE)(nvml.handle), "nvmlDeviceGetName"); + nvml.nvmlErrorString = (const char * (*)(nvmlReturn_enum)) GetProcAddress((HMODULE)(nvml.handle), "nvmlErrorString"); +- if (nvml.nvmlInit_v2 == NULL || nvml.nvmlShutdown == NULL || nvml.nvmlDeviceGetHandleByUUID == NULL || nvml.nvmlDeviceGetMemoryInfo == NULL || nvml.nvmlErrorString == NULL) { ++ if (nvml.nvmlInit_v2 == NULL || nvml.nvmlShutdown == NULL || nvml.nvmlDeviceGetHandleByUUID == NULL || nvml.nvmlDeviceGetMemoryInfo == NULL || nvml.nvmlDeviceGetName == NULL || nvml.nvmlErrorString == NULL) { + GGML_LOG_INFO("%s unable to locate required symbols in NVML.dll", __func__); + FreeLibrary((HMODULE)(nvml.handle)); + nvml.handle = NULL; +@@ -151,8 +179,9 @@ int ggml_nvml_init() { + nvml.nvmlShutdown = (nvmlReturn_enum (*)()) dlsym(nvml.handle, "nvmlShutdown"); + nvml.nvmlDeviceGetHandleByUUID = (nvmlReturn_t (*)(const char *, nvmlDevice_t *)) dlsym(nvml.handle, "nvmlDeviceGetHandleByUUID"); + nvml.nvmlDeviceGetMemoryInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlMemory_t *)) dlsym(nvml.handle, "nvmlDeviceGetMemoryInfo"); ++ nvml.nvmlDeviceGetName = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int)) dlsym(nvml.handle, "nvmlDeviceGetName"); + nvml.nvmlErrorString = (const char * (*)(nvmlReturn_enum)) dlsym(nvml.handle, "nvmlErrorString"); +- if (nvml.nvmlInit_v2 == NULL || nvml.nvmlShutdown == NULL || nvml.nvmlDeviceGetHandleByUUID == NULL || nvml.nvmlDeviceGetMemoryInfo == NULL) { ++ if (nvml.nvmlInit_v2 == NULL || nvml.nvmlShutdown == NULL || nvml.nvmlDeviceGetHandleByUUID == NULL || nvml.nvmlDeviceGetMemoryInfo == NULL || nvml.nvmlDeviceGetName == NULL) { + GGML_LOG_INFO("%s unable to locate required symbols in libnvidia-ml.so", __func__); + dlclose(nvml.handle); + nvml.handle = NULL; +@@ -199,10 +228,46 @@ int ggml_nvml_get_device_memory(const char *uuid, size_t *free, size_t *total) { + } + nvmlMemory_t memInfo = {0}; + status = nvml.nvmlDeviceGetMemoryInfo(device, &memInfo); ++ + if (status == NVML_SUCCESS) { ++ // NVML working correctly, use its values + *free = memInfo.free; + *total = memInfo.total; ++ return NVML_SUCCESS; + } ++ ++#ifndef _WIN32 ++ // Handle NVML_ERROR_NOT_SUPPORTED - this indicates NVML doesn't support ++ // reporting framebuffer memory (e.g., unified memory GPUs where FB memory is 0) ++ if (status == NVML_ERROR_NOT_SUPPORTED) { ++ // Use system memory from /proc/meminfo ++ size_t mem_available = get_mem_available(); ++ size_t mem_total = 0; ++ ++ // Read MemTotal ++ std::ifstream meminfo("/proc/meminfo"); ++ if (meminfo.is_open()) { ++ std::string line; ++ while (std::getline(meminfo, line)) { ++ if (line.find("MemTotal:") == 0) { ++ size_t total_kb; ++ sscanf(line.c_str(), "MemTotal: %zu kB", &total_kb); ++ mem_total = total_kb * 1024; ++ break; ++ } ++ } ++ } ++ ++ if (mem_total > 0) { ++ *total = mem_total; ++ *free = mem_available; ++ GGML_LOG_INFO("%s NVML not supported for memory query, using system memory (total=%zu, available=%zu)\n", ++ __func__, mem_total, mem_available); ++ return NVML_SUCCESS; ++ } ++ } ++#endif ++ + return status; + } + diff --git a/ml/backend/ggml/ggml/src/mem_nvml.cpp b/ml/backend/ggml/ggml/src/mem_nvml.cpp index c9073cef..f473a2a2 100644 --- a/ml/backend/ggml/ggml/src/mem_nvml.cpp +++ b/ml/backend/ggml/ggml/src/mem_nvml.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef _WIN32 # define WIN32_LEAN_AND_MEAN @@ -23,6 +24,8 @@ #else # include # include +# include +# include #endif namespace fs = std::filesystem; @@ -79,12 +82,36 @@ struct { nvmlReturn_t (*nvmlShutdown)(void); nvmlReturn_t (*nvmlDeviceGetHandleByUUID)(const char *, nvmlDevice_t *); nvmlReturn_t (*nvmlDeviceGetMemoryInfo)(nvmlDevice_t, nvmlMemory_t *); + nvmlReturn_t (*nvmlDeviceGetName)(nvmlDevice_t, char *, unsigned int); const char * (*nvmlErrorString)(nvmlReturn_t result); -} nvml { NULL, NULL, NULL, NULL, NULL }; +} nvml { NULL, NULL, NULL, NULL, NULL, NULL, NULL }; static std::mutex ggml_nvml_lock; extern "C" { +#ifndef _WIN32 +// Helper function to get available memory from /proc/meminfo on Linux +// Returns MemAvailable as calculated by the kernel +static size_t get_mem_available() { + std::ifstream meminfo("/proc/meminfo"); + if (!meminfo.is_open()) { + return 0; + } + + std::string line; + while (std::getline(meminfo, line)) { + if (line.find("MemAvailable:") == 0) { + size_t available_kb; + sscanf(line.c_str(), "MemAvailable: %zu kB", &available_kb); + // Convert from kB to bytes + return available_kb * 1024; + } + } + + return 0; +} +#endif + int ggml_nvml_init() { std::lock_guard lock(ggml_nvml_lock); if (nvml.handle != NULL) { @@ -117,8 +144,9 @@ int ggml_nvml_init() { nvml.nvmlShutdown = (nvmlReturn_enum (*)()) GetProcAddress((HMODULE)(nvml.handle), "nvmlShutdown"); nvml.nvmlDeviceGetHandleByUUID = (nvmlReturn_t (*)(const char *, nvmlDevice_t *)) GetProcAddress((HMODULE)(nvml.handle), "nvmlDeviceGetHandleByUUID"); nvml.nvmlDeviceGetMemoryInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlMemory_t *)) GetProcAddress((HMODULE)(nvml.handle), "nvmlDeviceGetMemoryInfo"); + nvml.nvmlDeviceGetName = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int)) GetProcAddress((HMODULE)(nvml.handle), "nvmlDeviceGetName"); nvml.nvmlErrorString = (const char * (*)(nvmlReturn_enum)) GetProcAddress((HMODULE)(nvml.handle), "nvmlErrorString"); - if (nvml.nvmlInit_v2 == NULL || nvml.nvmlShutdown == NULL || nvml.nvmlDeviceGetHandleByUUID == NULL || nvml.nvmlDeviceGetMemoryInfo == NULL || nvml.nvmlErrorString == NULL) { + if (nvml.nvmlInit_v2 == NULL || nvml.nvmlShutdown == NULL || nvml.nvmlDeviceGetHandleByUUID == NULL || nvml.nvmlDeviceGetMemoryInfo == NULL || nvml.nvmlDeviceGetName == NULL || nvml.nvmlErrorString == NULL) { GGML_LOG_INFO("%s unable to locate required symbols in NVML.dll", __func__); FreeLibrary((HMODULE)(nvml.handle)); nvml.handle = NULL; @@ -151,8 +179,9 @@ int ggml_nvml_init() { nvml.nvmlShutdown = (nvmlReturn_enum (*)()) dlsym(nvml.handle, "nvmlShutdown"); nvml.nvmlDeviceGetHandleByUUID = (nvmlReturn_t (*)(const char *, nvmlDevice_t *)) dlsym(nvml.handle, "nvmlDeviceGetHandleByUUID"); nvml.nvmlDeviceGetMemoryInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlMemory_t *)) dlsym(nvml.handle, "nvmlDeviceGetMemoryInfo"); + nvml.nvmlDeviceGetName = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int)) dlsym(nvml.handle, "nvmlDeviceGetName"); nvml.nvmlErrorString = (const char * (*)(nvmlReturn_enum)) dlsym(nvml.handle, "nvmlErrorString"); - if (nvml.nvmlInit_v2 == NULL || nvml.nvmlShutdown == NULL || nvml.nvmlDeviceGetHandleByUUID == NULL || nvml.nvmlDeviceGetMemoryInfo == NULL) { + if (nvml.nvmlInit_v2 == NULL || nvml.nvmlShutdown == NULL || nvml.nvmlDeviceGetHandleByUUID == NULL || nvml.nvmlDeviceGetMemoryInfo == NULL || nvml.nvmlDeviceGetName == NULL) { GGML_LOG_INFO("%s unable to locate required symbols in libnvidia-ml.so", __func__); dlclose(nvml.handle); nvml.handle = NULL; @@ -199,10 +228,46 @@ int ggml_nvml_get_device_memory(const char *uuid, size_t *free, size_t *total) { } nvmlMemory_t memInfo = {0}; status = nvml.nvmlDeviceGetMemoryInfo(device, &memInfo); + if (status == NVML_SUCCESS) { + // NVML working correctly, use its values *free = memInfo.free; *total = memInfo.total; + return NVML_SUCCESS; } + +#ifndef _WIN32 + // Handle NVML_ERROR_NOT_SUPPORTED - this indicates NVML doesn't support + // reporting framebuffer memory (e.g., unified memory GPUs where FB memory is 0) + if (status == NVML_ERROR_NOT_SUPPORTED) { + // Use system memory from /proc/meminfo + size_t mem_available = get_mem_available(); + size_t mem_total = 0; + + // Read MemTotal + std::ifstream meminfo("/proc/meminfo"); + if (meminfo.is_open()) { + std::string line; + while (std::getline(meminfo, line)) { + if (line.find("MemTotal:") == 0) { + size_t total_kb; + sscanf(line.c_str(), "MemTotal: %zu kB", &total_kb); + mem_total = total_kb * 1024; + break; + } + } + } + + if (mem_total > 0) { + *total = mem_total; + *free = mem_available; + GGML_LOG_INFO("%s NVML not supported for memory query, using system memory (total=%zu, available=%zu)\n", + __func__, mem_total, mem_available); + return NVML_SUCCESS; + } + } +#endif + return status; }