discover: Disable flash attention for Jetson Xavier (CC 7.2)

GGML picks the wrong kernel and these systems fail with:
Sep 28 22:25:39 xavier ollama[48999]: //ml/backend/ggml/ggml/src/ggml-cuda/fattn-wmma-f16.cu:437:
ERROR: CUDA kernel flash_attn_ext_f16 has no device code compatible with CUDA arch 720. ggml-cuda.cu
was compiled for: __CUDA_ARCH_LIST__

Fixes #12442
This commit is contained in:
Jesse Gross
2025-10-07 11:37:58 -07:00
committed by Jesse Gross
parent 4e5d862ec4
commit aa45f7ce27
3 changed files with 17 additions and 13 deletions

View File

@@ -2,7 +2,6 @@ package discover
import (
"context"
"fmt"
"log/slog"
"os"
"path/filepath"
@@ -62,17 +61,14 @@ func devInfoToInfoList(devs []ml.DeviceInfo) GpuInfoList {
DependencyPath: dev.LibraryPath,
DriverMajor: dev.DriverMajor,
DriverMinor: dev.DriverMinor,
ComputeMajor: dev.ComputeMajor,
ComputeMinor: dev.ComputeMinor,
}
if dev.Library == "CUDA" || dev.Library == "ROCm" {
info.MinimumMemory = 457 * format.MebiByte
}
if dev.Library == "ROCm" {
info.Compute = fmt.Sprintf("gfx%x%02x", dev.ComputeMajor, dev.ComputeMinor)
if rocmDir != "" {
info.DependencyPath = append(info.DependencyPath, rocmDir)
}
} else {
info.Compute = fmt.Sprintf("%d.%d", dev.ComputeMajor, dev.ComputeMinor)
if dev.Library == "ROCm" && rocmDir != "" {
info.DependencyPath = append(info.DependencyPath, rocmDir)
}
resp = append(resp, info)
}