From 2f36d769aa2db6e7bb41a0dbd079f9ce7a9bdc40 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Mon, 17 Nov 2025 15:40:58 -0800 Subject: [PATCH] bring back sysfs based VRAM information for AMD (#12871) * build: optimize dockerfile context for iterating This moves the copy of the source into the layer AFTER doing software installs so we don't have to go through the RPM install for cuda, etc. every time you touch a source file. * amd: implement linux sysfs based VRAM lookup This adds a C++ implementation of sysfs DRM VRAM discovery for more accurate free VRAM data on linux for AMD GPUs. --- Dockerfile | 14 ++- .../0024-GPU-discovery-enhancements.patch | 105 +++++++++++++++--- .../ggml/ggml/src/ggml-cuda/ggml-cuda.cu | 2 +- .../ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp | 2 +- ml/backend/ggml/ggml/src/mem_hip.cpp | 85 +++++++++++++- 5 files changed, 186 insertions(+), 22 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3a936506..c46cfe08 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,14 +39,14 @@ ENV CC=clang CXX=clang++ FROM base-${TARGETARCH} AS base ARG CMAKEVERSION RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 -COPY CMakeLists.txt CMakePresets.json . -COPY ml/backend/ggml/ggml ml/backend/ggml/ggml ENV LDFLAGS=-s FROM base AS cpu RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH ARG PARALLEL +COPY CMakeLists.txt CMakePresets.json . +COPY ml/backend/ggml/ggml ml/backend/ggml/ggml RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'CPU' \ && cmake --build --parallel ${PARALLEL} --preset 'CPU' \ @@ -57,6 +57,8 @@ ARG CUDA11VERSION=11.8 RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-} ENV PATH=/usr/local/cuda-11/bin:$PATH ARG PARALLEL +COPY CMakeLists.txt CMakePresets.json . +COPY ml/backend/ggml/ggml ml/backend/ggml/ggml RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'CUDA 11' \ && cmake --build --parallel ${PARALLEL} --preset 'CUDA 11' \ @@ -67,6 +69,8 @@ ARG CUDA12VERSION=12.8 RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-} ENV PATH=/usr/local/cuda-12/bin:$PATH ARG PARALLEL +COPY CMakeLists.txt CMakePresets.json . +COPY ml/backend/ggml/ggml ml/backend/ggml/ggml RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'CUDA 12' \ && cmake --build --parallel ${PARALLEL} --preset 'CUDA 12' \ @@ -78,6 +82,8 @@ ARG CUDA13VERSION=13.0 RUN dnf install -y cuda-toolkit-${CUDA13VERSION//./-} ENV PATH=/usr/local/cuda-13/bin:$PATH ARG PARALLEL +COPY CMakeLists.txt CMakePresets.json . +COPY ml/backend/ggml/ggml ml/backend/ggml/ggml RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'CUDA 13' \ && cmake --build --parallel ${PARALLEL} --preset 'CUDA 13' \ @@ -87,6 +93,8 @@ RUN --mount=type=cache,target=/root/.ccache \ FROM base AS rocm-6 ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH ARG PARALLEL +COPY CMakeLists.txt CMakePresets.json . +COPY ml/backend/ggml/ggml ml/backend/ggml/ggml RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'ROCm 6' \ && cmake --build --parallel ${PARALLEL} --preset 'ROCm 6' \ @@ -118,6 +126,8 @@ RUN --mount=type=cache,target=/root/.ccache \ && cmake --install build --component CUDA --strip --parallel ${PARALLEL} FROM base AS vulkan +COPY CMakeLists.txt CMakePresets.json . +COPY ml/backend/ggml/ggml ml/backend/ggml/ggml RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'Vulkan' \ && cmake --build --parallel --preset 'Vulkan' \ diff --git a/llama/patches/0024-GPU-discovery-enhancements.patch b/llama/patches/0024-GPU-discovery-enhancements.patch index 9f2cdd77..5a2adf8d 100644 --- a/llama/patches/0024-GPU-discovery-enhancements.patch +++ b/llama/patches/0024-GPU-discovery-enhancements.patch @@ -20,10 +20,10 @@ fix vulkan PCI ID and ID handling ggml/src/ggml-cuda/vendors/hip.h | 3 + ggml/src/ggml-impl.h | 8 + ggml/src/ggml-metal/ggml-metal.cpp | 2 + - ggml/src/ggml-vulkan/ggml-vulkan.cpp | 209 +++++++++++-- - ggml/src/mem_hip.cpp | 452 +++++++++++++++++++++++++++ - ggml/src/mem_nvml.cpp | 209 +++++++++++++ - 9 files changed, 926 insertions(+), 30 deletions(-) + ggml/src/ggml-vulkan/ggml-vulkan.cpp | 209 +++++++++-- + ggml/src/mem_hip.cpp | 529 +++++++++++++++++++++++++++ + ggml/src/mem_nvml.cpp | 209 +++++++++++ + 9 files changed, 1003 insertions(+), 30 deletions(-) create mode 100644 ggml/src/mem_hip.cpp create mode 100644 ggml/src/mem_nvml.cpp @@ -58,7 +58,7 @@ index f9a6587f1..03f359ae9 100644 target_include_directories(ggml-base PRIVATE .) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu -index c9333689f..41b00af83 100644 +index c9333689f..f1a20e7fe 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -261,6 +261,16 @@ static ggml_cuda_device_info ggml_cuda_init() { @@ -111,7 +111,7 @@ index c9333689f..41b00af83 100644 + if (ggml_hip_mgmt_init() == 0) { + int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total); + if (status == 0) { -+ GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total); ++ GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total); + ggml_hip_mgmt_release(); + return; + } @@ -243,7 +243,7 @@ index 05ff6a5a6..032dee76d 100644 /* .async = */ true, /* .host_buffer = */ false, diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp -index 3a6bbe564..d2c278a35 100644 +index 3a6bbe564..ca02ea079 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -229,6 +229,7 @@ class vk_memory_logger; @@ -337,7 +337,7 @@ index 3a6bbe564..d2c278a35 100644 + if (ggml_hip_mgmt_init() == 0) { + int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total); + if (status == 0) { -+ GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total); ++ GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total); + ggml_hip_mgmt_release(); + return; + } @@ -548,11 +548,12 @@ index 3a6bbe564..d2c278a35 100644 } diff --git a/ggml/src/mem_hip.cpp b/ggml/src/mem_hip.cpp new file mode 100644 -index 000000000..5a7f5d465 +index 000000000..c1949b899 --- /dev/null +++ b/ggml/src/mem_hip.cpp -@@ -0,0 +1,452 @@ +@@ -0,0 +1,529 @@ +#include "ggml.h" ++#include "ggml-impl.h" + +#ifdef _WIN32 +// AMD Device Library eXtra (ADLX) @@ -570,7 +571,6 @@ index 000000000..5a7f5d465 +// Unused function parameters are commented out to avoid unnecessary type +// definitions. + -+#include "ggml-impl.h" +#include +#include + @@ -990,15 +990,92 @@ index 000000000..5a7f5d465 + +#else // #ifdef _WIN32 + ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++namespace fs = std::filesystem; ++ +extern "C" { + -+// TODO Linux implementation of accurate VRAM reporting +int ggml_hip_mgmt_init() { -+ return -1; ++ return 0; +} +void ggml_hip_mgmt_release() {} +int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) { -+ return -1; ++ GGML_LOG_INFO("%s searching for device %s\n", __func__, id); ++ const std::string drmDeviceGlob = "/sys/class/drm/card*/device/uevent"; ++ const std::string drmTotalMemoryFile = "mem_info_vram_total"; ++ const std::string drmUsedMemoryFile = "mem_info_vram_used"; ++ const std::string drmUeventPCISlotLabel = "PCI_SLOT_NAME="; ++ ++ glob_t glob_result; ++ glob(drmDeviceGlob.c_str(), GLOB_NOSORT, NULL, &glob_result); ++ ++ for (size_t i = 0; i < glob_result.gl_pathc; ++i) { ++ const char* device_file = glob_result.gl_pathv[i]; ++ std::ifstream file(device_file); ++ if (!file.is_open()) { ++ std::cerr << "Failed to open sysfs node" << std::endl; ++ globfree(&glob_result); ++ return 1; ++ } ++ ++ std::string line; ++ while (std::getline(file, line)) { ++ // Check for PCI_SLOT_NAME label ++ if (line.find(drmUeventPCISlotLabel) == 0) { ++ std::istringstream iss(line.substr(drmUeventPCISlotLabel.size())); ++ std::string pciSlot; ++ iss >> pciSlot; ++ if (pciSlot == std::string(id)) { ++ std::string dir = fs::path(device_file).parent_path().string(); ++ ++ std::string totalFile = dir + "/" + drmTotalMemoryFile; ++ std::ifstream totalFileStream(totalFile.c_str()); ++ if (!totalFileStream.is_open()) { ++ GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, totalFile.c_str()); ++ file.close(); ++ globfree(&glob_result); ++ return 1; ++ } ++ ++ uint64_t memory; ++ totalFileStream >> memory; ++ *total = memory; ++ ++ std::string usedFile = dir + "/" + drmUsedMemoryFile; ++ std::ifstream usedFileStream(usedFile.c_str()); ++ if (!usedFileStream.is_open()) { ++ GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, usedFile.c_str()); ++ file.close(); ++ globfree(&glob_result); ++ return 1; ++ } ++ ++ uint64_t memoryUsed; ++ usedFileStream >> memoryUsed; ++ *free = memory - memoryUsed; ++ ++ file.close(); ++ globfree(&glob_result); ++ return 0; ++ } ++ } ++ } ++ ++ file.close(); ++ } ++ GGML_LOG_DEBUG("%s unable to find matching device\n", __func__); ++ globfree(&glob_result); ++ return 1; +} + +} // extern "C" diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu index 41b00af8..f1a20e7f 100644 --- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu @@ -3513,7 +3513,7 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * if (ggml_hip_mgmt_init() == 0) { int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total); if (status == 0) { - GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total); + GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total); ggml_hip_mgmt_release(); return; } diff --git a/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 80185d9f..903050b0 100644 --- a/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -13212,7 +13212,7 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size if (ggml_hip_mgmt_init() == 0) { int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total); if (status == 0) { - GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total); + GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total); ggml_hip_mgmt_release(); return; } diff --git a/ml/backend/ggml/ggml/src/mem_hip.cpp b/ml/backend/ggml/ggml/src/mem_hip.cpp index 5a7f5d46..c1949b89 100644 --- a/ml/backend/ggml/ggml/src/mem_hip.cpp +++ b/ml/backend/ggml/ggml/src/mem_hip.cpp @@ -1,4 +1,5 @@ #include "ggml.h" +#include "ggml-impl.h" #ifdef _WIN32 // AMD Device Library eXtra (ADLX) @@ -16,7 +17,6 @@ // Unused function parameters are commented out to avoid unnecessary type // definitions. -#include "ggml-impl.h" #include #include @@ -436,15 +436,92 @@ int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) { #else // #ifdef _WIN32 +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +namespace fs = std::filesystem; + extern "C" { -// TODO Linux implementation of accurate VRAM reporting int ggml_hip_mgmt_init() { - return -1; + return 0; } void ggml_hip_mgmt_release() {} int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) { - return -1; + GGML_LOG_INFO("%s searching for device %s\n", __func__, id); + const std::string drmDeviceGlob = "/sys/class/drm/card*/device/uevent"; + const std::string drmTotalMemoryFile = "mem_info_vram_total"; + const std::string drmUsedMemoryFile = "mem_info_vram_used"; + const std::string drmUeventPCISlotLabel = "PCI_SLOT_NAME="; + + glob_t glob_result; + glob(drmDeviceGlob.c_str(), GLOB_NOSORT, NULL, &glob_result); + + for (size_t i = 0; i < glob_result.gl_pathc; ++i) { + const char* device_file = glob_result.gl_pathv[i]; + std::ifstream file(device_file); + if (!file.is_open()) { + std::cerr << "Failed to open sysfs node" << std::endl; + globfree(&glob_result); + return 1; + } + + std::string line; + while (std::getline(file, line)) { + // Check for PCI_SLOT_NAME label + if (line.find(drmUeventPCISlotLabel) == 0) { + std::istringstream iss(line.substr(drmUeventPCISlotLabel.size())); + std::string pciSlot; + iss >> pciSlot; + if (pciSlot == std::string(id)) { + std::string dir = fs::path(device_file).parent_path().string(); + + std::string totalFile = dir + "/" + drmTotalMemoryFile; + std::ifstream totalFileStream(totalFile.c_str()); + if (!totalFileStream.is_open()) { + GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, totalFile.c_str()); + file.close(); + globfree(&glob_result); + return 1; + } + + uint64_t memory; + totalFileStream >> memory; + *total = memory; + + std::string usedFile = dir + "/" + drmUsedMemoryFile; + std::ifstream usedFileStream(usedFile.c_str()); + if (!usedFileStream.is_open()) { + GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, usedFile.c_str()); + file.close(); + globfree(&glob_result); + return 1; + } + + uint64_t memoryUsed; + usedFileStream >> memoryUsed; + *free = memory - memoryUsed; + + file.close(); + globfree(&glob_result); + return 0; + } + } + } + + file.close(); + } + GGML_LOG_DEBUG("%s unable to find matching device\n", __func__); + globfree(&glob_result); + return 1; } } // extern "C"