From a4770107a6ea6b4f5adc235d37d08417dc3b9184 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 4 Nov 2025 10:31:22 -0800 Subject: [PATCH] vulkan: enable flash attention (#12937) Also adjusts the vulkan windows build pattern to match recent changes in other backends so incremental builds are faster. --- ml/device.go | 3 ++- scripts/build_windows.ps1 | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ml/device.go b/ml/device.go index 1fbe365e..70e0c6a3 100644 --- a/ml/device.go +++ b/ml/device.go @@ -432,7 +432,8 @@ func FlashAttentionSupported(l []DeviceInfo) bool { supportsFA := gpu.Library == "cpu" || gpu.Name == "Metal" || gpu.Library == "Metal" || (gpu.Library == "CUDA" && gpu.DriverMajor >= 7 && !(gpu.ComputeMajor == 7 && gpu.ComputeMinor == 2)) || - gpu.Library == "ROCm" + gpu.Library == "ROCm" || + gpu.Library == "Vulkan" if !supportsFA { return false diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1 index 548545cb..3c885b98 100644 --- a/scripts/build_windows.ps1 +++ b/scripts/build_windows.ps1 @@ -187,11 +187,11 @@ function buildROCm() { function buildVulkan(){ if ($env:VULKAN_SDK) { write-host "Building Vulkan backend libraries" - & cmake --fresh --preset Vulkan --install-prefix $script:DIST_DIR -DOLLAMA_RUNNER_DIR="vulkan" + & cmake -B build\vulkan --preset Vulkan --install-prefix $script:DIST_DIR -DOLLAMA_RUNNER_DIR="vulkan" if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - & cmake --build --preset Vulkan --config Release --parallel $script:JOBS + & cmake --build build\vulkan --target ggml-vulkan --config Release --parallel $script:JOBS if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - & cmake --install build --component Vulkan --strip + & cmake --install build\vulkan --component Vulkan --strip if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} } }