diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index 57476a9a..6b582b49 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -883,6 +883,10 @@ func (f GGML) SupportsFlashAttention() bool { return false } + if arch := f.KV().Architecture(); slices.Contains([]string{"gemma2"}, arch) { + return false + } + // Check head counts match and are non-zero headCountK := f.KV().EmbeddingHeadCountK() headCountV := f.KV().EmbeddingHeadCountV()