discover: Disable flash attention for Jetson Xavier (CC 7.2)

GGML picks the wrong kernel and these systems fail with: Sep 28 22:25:39 xavier ollama[48999]: //ml/backend/ggml/ggml/src/ggml-cuda/fattn-wmma-f16.cu:437: ERROR: CUDA kernel flash_attn_ext_f16 has no device code compatible with CUDA arch 720. ggml-cuda.cu was compiled for: __CUDA_ARCH_LIST__ Fixes #12442
2025-12-21 22:33:56 +00:00 · 2025-10-07 11:37:58 -07:00
parent 4e5d862ec4
commit aa45f7ce27
3 changed files with 17 additions and 13 deletions
--- a/discover/gpu.go
+++ b/discover/gpu.go
@@ -2,7 +2,6 @@ package discover

 import (
 	"context"
-	"fmt"
 	"log/slog"
 	"os"
 	"path/filepath"
@@ -62,17 +61,14 @@ func devInfoToInfoList(devs []ml.DeviceInfo) GpuInfoList {
 			DependencyPath: dev.LibraryPath,
 			DriverMajor:    dev.DriverMajor,
 			DriverMinor:    dev.DriverMinor,
+			ComputeMajor:   dev.ComputeMajor,
+			ComputeMinor:   dev.ComputeMinor,
 		}
 		if dev.Library == "CUDA" || dev.Library == "ROCm" {
 			info.MinimumMemory = 457 * format.MebiByte
 		}
-		if dev.Library == "ROCm" {
-			info.Compute = fmt.Sprintf("gfx%x%02x", dev.ComputeMajor, dev.ComputeMinor)
-			if rocmDir != "" {
-				info.DependencyPath = append(info.DependencyPath, rocmDir)
-			}
-		} else {
-			info.Compute = fmt.Sprintf("%d.%d", dev.ComputeMajor, dev.ComputeMinor)
+		if dev.Library == "ROCm" && rocmDir != "" {
+			info.DependencyPath = append(info.DependencyPath, rocmDir)
 		}
 		resp = append(resp, info)
 	}