mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 14:26:30 +00:00
Prior to this change our official binaries contained both JIT PTX code and the cubin binary code for our chosen compute capabilities. This change switches to only compile the PTX code and rely on JIT at runtime for generating the cubin specific to the users GPU. The cubins are cached on the users system, so they should only see a small lag on the very first model load for a given Ollama release. This also adds the first generation of Blackwell GPUs so they aren't reliant on the Hopper PTX. This change reduces the ggml-cuda.dll from 1.2G to 460M
102 lines
2.3 KiB
JSON
102 lines
2.3 KiB
JSON
{
|
|
"version": 3,
|
|
"configurePresets": [
|
|
{
|
|
"name": "Default",
|
|
"binaryDir": "${sourceDir}/build",
|
|
"installDir": "${sourceDir}/dist",
|
|
"cacheVariables": {
|
|
"CMAKE_BUILD_TYPE": "Release",
|
|
"CMAKE_MSVC_RUNTIME_LIBRARY": "MultiThreaded"
|
|
}
|
|
},
|
|
{
|
|
"name": "CPU",
|
|
"inherits": [ "Default" ]
|
|
},
|
|
{
|
|
"name": "CUDA",
|
|
"inherits": [ "Default" ]
|
|
},
|
|
{
|
|
"name": "CUDA 12",
|
|
"inherits": [ "CUDA" ],
|
|
"cacheVariables": {
|
|
"CMAKE_CUDA_ARCHITECTURES": "50-virtual;60-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-virtual;89-virtual;90-virtual;90a-virtual;100-virtual;120-virtual",
|
|
"CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets -t 2"
|
|
}
|
|
},
|
|
{
|
|
"name": "JetPack 5",
|
|
"inherits": [ "CUDA" ],
|
|
"cacheVariables": {
|
|
"CMAKE_CUDA_ARCHITECTURES": "72-virtual;87-virtual"
|
|
}
|
|
},
|
|
{
|
|
"name": "JetPack 6",
|
|
"inherits": [ "CUDA" ],
|
|
"cacheVariables": {
|
|
"CMAKE_CUDA_ARCHITECTURES": "87-virtual"
|
|
}
|
|
},
|
|
{
|
|
"name": "ROCm",
|
|
"inherits": [ "Default" ],
|
|
"cacheVariables": {
|
|
"CMAKE_HIP_PLATFORM": "amd"
|
|
}
|
|
},
|
|
{
|
|
"name": "ROCm 6",
|
|
"inherits": [ "ROCm" ],
|
|
"cacheVariables": {
|
|
"CMAKE_HIP_FLAGS": "-parallel-jobs=4",
|
|
"AMDGPU_TARGETS": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-"
|
|
}
|
|
}
|
|
],
|
|
"buildPresets": [
|
|
{
|
|
"name": "Default",
|
|
"configurePreset": "Default",
|
|
"configuration": "Release"
|
|
},
|
|
{
|
|
"name": "CPU",
|
|
"configurePreset": "Default",
|
|
"targets": [ "ggml-cpu" ]
|
|
},
|
|
{
|
|
"name": "CUDA",
|
|
"configurePreset": "CUDA",
|
|
"targets": [ "ggml-cuda" ]
|
|
},
|
|
{
|
|
"name": "CUDA 12",
|
|
"inherits": [ "CUDA" ],
|
|
"configurePreset": "CUDA 12"
|
|
},
|
|
{
|
|
"name": "JetPack 5",
|
|
"inherits": [ "CUDA" ],
|
|
"configurePreset": "JetPack 5"
|
|
},
|
|
{
|
|
"name": "JetPack 6",
|
|
"inherits": [ "CUDA" ],
|
|
"configurePreset": "JetPack 6"
|
|
},
|
|
{
|
|
"name": "ROCm",
|
|
"configurePreset": "ROCm",
|
|
"targets": [ "ggml-hip" ]
|
|
},
|
|
{
|
|
"name": "ROCm 6",
|
|
"inherits": [ "ROCm" ],
|
|
"configurePreset": "ROCm 6"
|
|
}
|
|
]
|
|
}
|