vulkan: enable flash attention (#12937)

Also adjusts the vulkan windows build pattern to match recent changes in other backends
so incremental builds are faster.
This commit is contained in:
Daniel Hiltgen
2025-11-04 10:31:22 -08:00
committed by GitHub
parent ef549d513c
commit a4770107a6
2 changed files with 5 additions and 4 deletions

View File

@@ -432,7 +432,8 @@ func FlashAttentionSupported(l []DeviceInfo) bool {
supportsFA := gpu.Library == "cpu" || supportsFA := gpu.Library == "cpu" ||
gpu.Name == "Metal" || gpu.Library == "Metal" || gpu.Name == "Metal" || gpu.Library == "Metal" ||
(gpu.Library == "CUDA" && gpu.DriverMajor >= 7 && !(gpu.ComputeMajor == 7 && gpu.ComputeMinor == 2)) || (gpu.Library == "CUDA" && gpu.DriverMajor >= 7 && !(gpu.ComputeMajor == 7 && gpu.ComputeMinor == 2)) ||
gpu.Library == "ROCm" gpu.Library == "ROCm" ||
gpu.Library == "Vulkan"
if !supportsFA { if !supportsFA {
return false return false

View File

@@ -187,11 +187,11 @@ function buildROCm() {
function buildVulkan(){ function buildVulkan(){
if ($env:VULKAN_SDK) { if ($env:VULKAN_SDK) {
write-host "Building Vulkan backend libraries" write-host "Building Vulkan backend libraries"
& cmake --fresh --preset Vulkan --install-prefix $script:DIST_DIR -DOLLAMA_RUNNER_DIR="vulkan" & cmake -B build\vulkan --preset Vulkan --install-prefix $script:DIST_DIR -DOLLAMA_RUNNER_DIR="vulkan"
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& cmake --build --preset Vulkan --config Release --parallel $script:JOBS & cmake --build build\vulkan --target ggml-vulkan --config Release --parallel $script:JOBS
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& cmake --install build --component Vulkan --strip & cmake --install build\vulkan --component Vulkan --strip
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
} }
} }