llm: Enable flash attention by default for qwen3 and qwen3moe

2025-12-21 14:26:30 +00:00 · 2025-10-02 16:51:51 -07:00
parent 55ca827267
commit 0bda72892c
1 changed files with 2 additions and 0 deletions
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -899,6 +899,8 @@ func (f GGML) SupportsFlashAttention() bool {
 func (f GGML) FlashAttention() bool {
 	return slices.Contains([]string{
 		"gptoss", "gpt-oss",
 		"qwen3",
 		"qwen3moe",
 	}, f.KV().String("general.architecture"))
 }