mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
vulkan: enable flash attention (#12937)
Also adjusts the vulkan windows build pattern to match recent changes in other backends so incremental builds are faster.
This commit is contained in:
@@ -432,7 +432,8 @@ func FlashAttentionSupported(l []DeviceInfo) bool {
|
||||
supportsFA := gpu.Library == "cpu" ||
|
||||
gpu.Name == "Metal" || gpu.Library == "Metal" ||
|
||||
(gpu.Library == "CUDA" && gpu.DriverMajor >= 7 && !(gpu.ComputeMajor == 7 && gpu.ComputeMinor == 2)) ||
|
||||
gpu.Library == "ROCm"
|
||||
gpu.Library == "ROCm" ||
|
||||
gpu.Library == "Vulkan"
|
||||
|
||||
if !supportsFA {
|
||||
return false
|
||||
|
||||
Reference in New Issue
Block a user