mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 14:26:30 +00:00
llm: Enable flash attention by default for qwen3 and qwen3moe
This commit is contained in:
@@ -899,6 +899,8 @@ func (f GGML) SupportsFlashAttention() bool {
|
|||||||
func (f GGML) FlashAttention() bool {
|
func (f GGML) FlashAttention() bool {
|
||||||
return slices.Contains([]string{
|
return slices.Contains([]string{
|
||||||
"gptoss", "gpt-oss",
|
"gptoss", "gpt-oss",
|
||||||
|
"qwen3",
|
||||||
|
"qwen3moe",
|
||||||
}, f.KV().String("general.architecture"))
|
}, f.KV().String("general.architecture"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user