llm: Enable flash attention by default for qwen3 and qwen3moe

This commit is contained in:
Jesse Gross
2025-10-02 16:51:51 -07:00
committed by Jesse Gross
parent 55ca827267
commit 0bda72892c

View File

@@ -899,6 +899,8 @@ func (f GGML) SupportsFlashAttention() bool {
func (f GGML) FlashAttention() bool {
return slices.Contains([]string{
"gptoss", "gpt-oss",
"qwen3",
"qwen3moe",
}, f.KV().String("general.architecture"))
}