From 0bda72892cc8b42a649e950f004b100020bcd1aa Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Thu, 2 Oct 2025 16:51:51 -0700 Subject: [PATCH] llm: Enable flash attention by default for qwen3 and qwen3moe --- fs/ggml/ggml.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index 58803f58..d34a6913 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -899,6 +899,8 @@ func (f GGML) SupportsFlashAttention() bool { func (f GGML) FlashAttention() bool { return slices.Contains([]string{ "gptoss", "gpt-oss", + "qwen3", + "qwen3moe", }, f.KV().String("general.architecture")) }