From 0bda72892cc8b42a649e950f004b100020bcd1aa Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@ollama.com>
Date: Thu, 2 Oct 2025 16:51:51 -0700
Subject: [PATCH] llm: Enable flash attention by default for qwen3 and qwen3moe

---
 fs/ggml/ggml.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go
index 58803f58..d34a6913 100644
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -899,6 +899,8 @@ func (f GGML) SupportsFlashAttention() bool {
 func (f GGML) FlashAttention() bool {
 	return slices.Contains([]string{
 		"gptoss", "gpt-oss",
+		"qwen3",
+		"qwen3moe",
 	}, f.KV().String("general.architecture"))
 }