use ollama engine for bert models (#13501)

register bpe tokenizer which enables granite-embedding
This commit is contained in:
Michael Yang
2025-12-16 11:29:19 -08:00
committed by GitHub
parent 89eb795293
commit 903b1fc97f
2 changed files with 33 additions and 30 deletions

View File

@@ -241,19 +241,20 @@ func (kv KV) Bools(key string, defaultValue ...[]bool) []bool {
func (kv KV) OllamaEngineRequired() bool { func (kv KV) OllamaEngineRequired() bool {
return slices.Contains([]string{ return slices.Contains([]string{
"bert",
"deepseek2",
"deepseekocr",
"gemma3", "gemma3",
"gemma3n", "gemma3n",
"gptoss", "gpt-oss", "gptoss", "gpt-oss",
"llama4", "llama4",
"mistral3", "mistral3",
"mllama", "mllama",
"nomic-bert",
"olmo3",
"qwen25vl", "qwen25vl",
"qwen3", "qwen3moe", "qwen3", "qwen3moe",
"qwen3vl", "qwen3vlmoe", "qwen3vl", "qwen3vlmoe",
"deepseekocr",
"deepseek2",
"nomic-bert",
"olmo3",
}, kv.Architecture()) }, kv.Architecture())
} }
@@ -839,6 +840,7 @@ func (f GGML) SupportsFlashAttention() bool {
// FlashAttention checks if the model should enable flash attention // FlashAttention checks if the model should enable flash attention
func (f GGML) FlashAttention() bool { func (f GGML) FlashAttention() bool {
return slices.Contains([]string{ return slices.Contains([]string{
"bert",
"gemma3", "gemma3",
"gptoss", "gpt-oss", "gptoss", "gpt-oss",
"mistral3", "mistral3",

View File

@@ -129,11 +129,7 @@ func (o Options) headDim() int {
} }
func New(c fs.Config) (model.Model, error) { func New(c fs.Config) (model.Model, error) {
var processor model.TextProcessor vocab := &model.Vocabulary{
switch c.String("tokenizer.ggml.model", "bert") {
case "bert":
processor = model.NewWordPiece(
&model.Vocabulary{
Values: c.Strings("tokenizer.ggml.tokens"), Values: c.Strings("tokenizer.ggml.tokens"),
Scores: c.Floats("tokenizer.ggml.scores"), Scores: c.Floats("tokenizer.ggml.scores"),
Types: c.Ints("tokenizer.ggml.token_type"), Types: c.Ints("tokenizer.ggml.token_type"),
@@ -155,9 +151,14 @@ func New(c fs.Config) (model.Model, error) {
c.Uint("tokenizer.ggml.eos_token_id"), c.Uint("tokenizer.ggml.eos_token_id"),
)), )),
}, },
}, }
true,
) var processor model.TextProcessor
switch c.String("tokenizer.ggml.model", "bert") {
case "bert":
processor = model.NewWordPiece(vocab, true)
case "gpt2":
processor = model.NewBytePairEncoding(vocab)
default: default:
return nil, model.ErrUnsupportedTokenizer return nil, model.ErrUnsupportedTokenizer
} }