use ollama engine for bert models (#13501)

register bpe tokenizer which enables granite-embedding
This commit is contained in:
Michael Yang
2025-12-16 11:29:19 -08:00
committed by GitHub
parent 89eb795293
commit 903b1fc97f
2 changed files with 33 additions and 30 deletions

View File

@@ -241,19 +241,20 @@ func (kv KV) Bools(key string, defaultValue ...[]bool) []bool {
func (kv KV) OllamaEngineRequired() bool {
return slices.Contains([]string{
"bert",
"deepseek2",
"deepseekocr",
"gemma3",
"gemma3n",
"gptoss", "gpt-oss",
"llama4",
"mistral3",
"mllama",
"nomic-bert",
"olmo3",
"qwen25vl",
"qwen3", "qwen3moe",
"qwen3vl", "qwen3vlmoe",
"deepseekocr",
"deepseek2",
"nomic-bert",
"olmo3",
}, kv.Architecture())
}
@@ -839,6 +840,7 @@ func (f GGML) SupportsFlashAttention() bool {
// FlashAttention checks if the model should enable flash attention
func (f GGML) FlashAttention() bool {
return slices.Contains([]string{
"bert",
"gemma3",
"gptoss", "gpt-oss",
"mistral3",