discover: Disable flash attention for Jetson Xavier (CC 7.2)

GGML picks the wrong kernel and these systems fail with: Sep 28 22:25:39 xavier ollama[48999]: //ml/backend/ggml/ggml/src/ggml-cuda/fattn-wmma-f16.cu:437: ERROR: CUDA kernel flash_attn_ext_f16 has no device code compatible with CUDA arch 720. ggml-cuda.cu was compiled for: __CUDA_ARCH_LIST__ Fixes #12442
2025-12-21 14:26:30 +00:00 · 2025-10-07 11:37:58 -07:00
parent 4e5d862ec4
commit aa45f7ce27
3 changed files with 17 additions and 13 deletions
--- a/discover/gpu.go
+++ b/discover/gpu.go
@@ -2,7 +2,6 @@ package discover

 import (
 	"context"
-	"fmt"
 	"log/slog"
 	"os"
 	"path/filepath"
@@ -62,17 +61,14 @@ func devInfoToInfoList(devs []ml.DeviceInfo) GpuInfoList {
 			DependencyPath: dev.LibraryPath,
 			DriverMajor:    dev.DriverMajor,
 			DriverMinor:    dev.DriverMinor,
+			ComputeMajor:   dev.ComputeMajor,
+			ComputeMinor:   dev.ComputeMinor,
 		}
 		if dev.Library == "CUDA" || dev.Library == "ROCm" {
 			info.MinimumMemory = 457 * format.MebiByte
 		}
-		if dev.Library == "ROCm" {
-			info.Compute = fmt.Sprintf("gfx%x%02x", dev.ComputeMajor, dev.ComputeMinor)
-			if rocmDir != "" {
-				info.DependencyPath = append(info.DependencyPath, rocmDir)
-			}
-		} else {
-			info.Compute = fmt.Sprintf("%d.%d", dev.ComputeMajor, dev.ComputeMinor)
+		if dev.Library == "ROCm" && rocmDir != "" {
+			info.DependencyPath = append(info.DependencyPath, rocmDir)
 		}
 		resp = append(resp, info)
 	}
--- a/discover/types.go
+++ b/discover/types.go
@@ -37,9 +37,10 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
 	UnreliableFreeMemory bool

 	// GPU information
-	filterID string // AMD Workaround: The numeric ID of the device used to filter out other devices
-	Name     string `json:"name"`    // user friendly name if available
-	Compute  string `json:"compute"` // Compute Capability or gfx
+	filterID     string // AMD Workaround: The numeric ID of the device used to filter out other devices
+	Name         string `json:"name"`          // user friendly name if available
+	ComputeMajor int    `json:"compute_major"` // Compute Capability or gfx
+	ComputeMinor int    `json:"compute_minor"`

 	// Driver Information - TODO no need to put this on each GPU
 	DriverMajor int `json:"driver_major,omitempty"`
@@ -173,7 +174,7 @@ func (l GpuInfoList) FlashAttentionSupported() bool {
 	for _, gpu := range l {
 		supportsFA := gpu.Library == "cpu" ||
 			gpu.Name == "Metal" || gpu.Library == "Metal" ||
-			(gpu.Library == "CUDA" && gpu.DriverMajor >= 7) ||
+			(gpu.Library == "CUDA" && gpu.DriverMajor >= 7 && !(gpu.ComputeMajor == 7 && gpu.ComputeMinor == 2)) || // We don't have kernels for Jetson Xavier
 			gpu.Library == "ROCm"

 		if !supportsFA {