diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index afb90720..fb993a28 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -761,6 +761,10 @@ func (f GGML) SupportsFlashAttention() bool { return false } + if f.KV().Architecture() == "gptoss" { + return false + } + // Check head counts match and are non-zero headCountK := f.KV().EmbeddingHeadCountK() headCountV := f.KV().EmbeddingHeadCountV()