mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
This revamps how we discover GPUs in the system by leveraging the Ollama runner. This should eliminate inconsistency between our GPU discovery and the runners capabilities at runtime, particularly for cases where we try to filter out unsupported GPUs. Now the runner does that implicitly based on the actual device list. In some cases free VRAM reporting can be unreliable which can leaad to scheduling mistakes, so this also includes a patch to leverage more reliable VRAM reporting libraries if available. Automatic workarounds have been removed as only one GPU leveraged this, which is now documented. This GPU will soon fall off the support matrix with the next ROCm bump. Additional cleanup of the scheduler and discovery packages can be done in the future once we have switched on the new memory management code, and removed support for the llama runner.
142 lines
5.1 KiB
CMake
142 lines
5.1 KiB
CMake
cmake_minimum_required(VERSION 3.21)
|
|
|
|
project(Ollama C CXX)
|
|
|
|
include(CheckLanguage)
|
|
include(GNUInstallDirs)
|
|
|
|
find_package(Threads REQUIRED)
|
|
|
|
set(CMAKE_BUILD_TYPE Release)
|
|
set(BUILD_SHARED_LIBS ON)
|
|
|
|
set(CMAKE_CXX_STANDARD 17)
|
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
set(CMAKE_CXX_EXTENSIONS OFF)
|
|
|
|
set(GGML_BUILD ON)
|
|
set(GGML_SHARED ON)
|
|
set(GGML_CCACHE ON)
|
|
set(GGML_BACKEND_DL ON)
|
|
set(GGML_BACKEND_SHARED ON)
|
|
set(GGML_SCHED_MAX_COPIES 4)
|
|
|
|
set(GGML_LLAMAFILE ON)
|
|
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
|
|
set(GGML_CUDA_GRAPHS ON)
|
|
set(GGML_CUDA_FA ON)
|
|
set(GGML_CUDA_COMPRESSION_MODE default)
|
|
|
|
if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
|
|
OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))
|
|
set(GGML_CPU_ALL_VARIANTS ON)
|
|
endif()
|
|
|
|
if (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
|
|
set(CMAKE_BUILD_RPATH "@loader_path")
|
|
set(CMAKE_INSTALL_RPATH "@loader_path")
|
|
endif()
|
|
|
|
set(OLLAMA_BUILD_DIR ${CMAKE_BINARY_DIR}/lib/ollama)
|
|
set(OLLAMA_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/lib/ollama/${OLLAMA_RUNNER_DIR})
|
|
|
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OLLAMA_BUILD_DIR})
|
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${OLLAMA_BUILD_DIR})
|
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${OLLAMA_BUILD_DIR})
|
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OLLAMA_BUILD_DIR})
|
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${OLLAMA_BUILD_DIR})
|
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${OLLAMA_BUILD_DIR})
|
|
|
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
|
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include)
|
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu)
|
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu/amx)
|
|
|
|
add_compile_definitions(NDEBUG GGML_VERSION=0x0 GGML_COMMIT=0x0)
|
|
|
|
set(GGML_CPU ON)
|
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
|
|
set_property(TARGET ggml PROPERTY EXCLUDE_FROM_ALL TRUE)
|
|
|
|
get_target_property(CPU_VARIANTS ggml-cpu MANUALLY_ADDED_DEPENDENCIES)
|
|
if(NOT CPU_VARIANTS)
|
|
set(CPU_VARIANTS "ggml-cpu")
|
|
endif()
|
|
|
|
install(TARGETS ggml-base ${CPU_VARIANTS}
|
|
RUNTIME_DEPENDENCIES
|
|
PRE_EXCLUDE_REGEXES ".*"
|
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CPU
|
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CPU
|
|
FRAMEWORK DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CPU
|
|
)
|
|
|
|
check_language(CUDA)
|
|
if(CMAKE_CUDA_COMPILER)
|
|
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24" AND NOT CMAKE_CUDA_ARCHITECTURES)
|
|
set(CMAKE_CUDA_ARCHITECTURES "native")
|
|
endif()
|
|
|
|
find_package(CUDAToolkit)
|
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
|
|
install(TARGETS ggml-cuda
|
|
RUNTIME_DEPENDENCIES
|
|
DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_BIN_DIR}/x64 ${CUDAToolkit_LIBRARY_DIR}
|
|
PRE_INCLUDE_REGEXES cublas cublasLt cudart
|
|
PRE_EXCLUDE_REGEXES ".*"
|
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CUDA
|
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CUDA
|
|
)
|
|
endif()
|
|
|
|
set(WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX "^gfx(906|908|90a|1200|1201):xnack[+-]$"
|
|
CACHE STRING
|
|
"Regular expression describing AMDGPU_TARGETS not supported on Windows. Override to force building these targets. Default \"^gfx(906|908|90a|1200|1201):xnack[+-]$\"."
|
|
)
|
|
|
|
check_language(HIP)
|
|
if(CMAKE_HIP_COMPILER)
|
|
set(HIP_PLATFORM "amd")
|
|
|
|
if(NOT AMDGPU_TARGETS)
|
|
find_package(hip REQUIRED)
|
|
list(FILTER AMDGPU_TARGETS INCLUDE REGEX "^gfx(900|94[012]|101[02]|1030|110[012]|120[01])$")
|
|
endif()
|
|
|
|
if(WIN32 AND WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX)
|
|
list(FILTER AMDGPU_TARGETS EXCLUDE REGEX ${WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX})
|
|
endif()
|
|
|
|
if(AMDGPU_TARGETS)
|
|
find_package(hip REQUIRED)
|
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)
|
|
|
|
if (WIN32)
|
|
target_compile_definitions(ggml-hip PRIVATE GGML_CUDA_NO_PEER_COPY)
|
|
endif()
|
|
|
|
target_compile_definitions(ggml-hip PRIVATE GGML_HIP_NO_VMM)
|
|
|
|
install(TARGETS ggml-hip
|
|
RUNTIME_DEPENDENCY_SET rocm
|
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
|
)
|
|
install(RUNTIME_DEPENDENCY_SET rocm
|
|
DIRECTORIES ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR}
|
|
PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register drm drm_amdgpu numa elf
|
|
PRE_EXCLUDE_REGEXES ".*"
|
|
POST_EXCLUDE_REGEXES "system32"
|
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
|
)
|
|
|
|
foreach(HIP_LIB_BIN_INSTALL_DIR IN ITEMS ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR})
|
|
if(EXISTS ${HIP_LIB_BIN_INSTALL_DIR}/rocblas)
|
|
install(DIRECTORY ${HIP_LIB_BIN_INSTALL_DIR}/rocblas DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP)
|
|
break()
|
|
endif()
|
|
endforeach()
|
|
endif()
|
|
endif()
|