vulkan: enable flash attention (#12937)

Also adjusts the vulkan windows build pattern to match recent changes in other backends so incremental builds are faster.
2025-12-21 14:26:30 +00:00 · 2025-11-04 10:31:22 -08:00
parent ef549d513c
commit a4770107a6
2 changed files with 5 additions and 4 deletions
--- a/ml/device.go
+++ b/ml/device.go
@@ -432,7 +432,8 @@ func FlashAttentionSupported(l []DeviceInfo) bool {
 		supportsFA := gpu.Library == "cpu" ||
 			gpu.Name == "Metal" || gpu.Library == "Metal" ||
 			(gpu.Library == "CUDA" && gpu.DriverMajor >= 7 && !(gpu.ComputeMajor == 7 && gpu.ComputeMinor == 2)) ||
-			gpu.Library == "ROCm"
+			gpu.Library == "ROCm" ||
+			gpu.Library == "Vulkan"

 		if !supportsFA {
 			return false
--- a/scripts/build_windows.ps1
+++ b/scripts/build_windows.ps1
@@ -187,11 +187,11 @@ function buildROCm() {
 function buildVulkan(){
    if ($env:VULKAN_SDK) {
        write-host "Building Vulkan backend libraries"
-        & cmake --fresh --preset Vulkan --install-prefix $script:DIST_DIR -DOLLAMA_RUNNER_DIR="vulkan"
+        & cmake -B build\vulkan --preset Vulkan --install-prefix $script:DIST_DIR -DOLLAMA_RUNNER_DIR="vulkan"
        if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
-        & cmake --build --preset Vulkan  --config Release --parallel $script:JOBS
+        & cmake --build build\vulkan --target ggml-vulkan  --config Release --parallel $script:JOBS
        if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
-        & cmake --install build --component Vulkan --strip
+        & cmake --install build\vulkan --component Vulkan --strip
        if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
    }
 }