diff --git a/model/models/qwen3/embed.go b/model/models/qwen3/embed.go new file mode 100644 index 00000000..9a77efea --- /dev/null +++ b/model/models/qwen3/embed.go @@ -0,0 +1,73 @@ +package qwen3 + +import ( + "github.com/ollama/ollama/fs" + "github.com/ollama/ollama/kvcache" + "github.com/ollama/ollama/ml" + "github.com/ollama/ollama/ml/nn/pooling" + "github.com/ollama/ollama/model" + "github.com/ollama/ollama/model/input" +) + +type embedModel struct { + model.Base + model.BytePairEncoding + + *Model + poolingType pooling.Type +} + +func (m *embedModel) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) { + hiddenStates, err := m.forward(ctx, batch) + if err != nil { + return nil, err + } + + hiddenStates = m.poolingType.Forward(ctx, hiddenStates) + hiddenStates = hiddenStates.L2Norm(ctx, 1e-12) + return hiddenStates, nil +} + +func newEmbed(c fs.Config) (model.Model, error) { + layers := make([]Layer, c.Uint("block_count")) + for i := range layers { + layers[i].MLP = &dense{} + } + m := embedModel{ + BytePairEncoding: model.NewBytePairEncoding( + `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`, + &model.Vocabulary{ + Values: c.Strings("tokenizer.ggml.tokens"), + Types: c.Ints("tokenizer.ggml.token_type"), + Merges: c.Strings("tokenizer.ggml.merges"), + AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true), + BOS: []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))}, + AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false), + EOS: append( + []int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))}, + c.Ints("tokenizer.ggml.eos_token_ids")..., + ), + }, + ), + Model: &Model{ + Layers: layers, + Options: &Options{ + hiddenSize: int(c.Uint("embedding_length")), + numHeads: int(c.Uint("attention.head_count")), + numKVHeads: int(c.Uint("attention.head_count_kv")), + keyLength: int(c.Uint("attention.key_length")), + valueLength: int(c.Uint("attention.value_length")), + eps: c.Float("attention.layer_norm_rms_epsilon"), + ropeBase: c.Float("rope.freq_base"), + ropeScale: c.Float("rope.freq_scale", 1), + numExperts: int(c.Uint("expert_count")), + numExpertsUsed: int(c.Uint("expert_used_count")), + normTopKProb: c.Bool("norm_top_k_prob", true), + }, + }, + poolingType: pooling.Type(c.Uint("pooling_type")), + } + + m.Cache = kvcache.NewCausalCache(m.Shift) + return &m, nil +} diff --git a/model/models/qwen3/model.go b/model/models/qwen3/model.go index c4e0b2d8..35226834 100644 --- a/model/models/qwen3/model.go +++ b/model/models/qwen3/model.go @@ -151,14 +151,25 @@ type Model struct { *Options } -// Forward implements model.Model. func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) { + hiddenStates, err := m.forward(ctx, batch) + if err != nil { + return nil, err + } + + return m.Output.Forward(ctx, hiddenStates), nil +} + +// Forward implements model.Model. +func (m *Model) forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) { positions := ctx.Input().FromIntSlice(batch.Positions, len(batch.Positions)) hiddenStates := m.TokenEmbedding.Forward(ctx, batch.Inputs) for i, layer := range m.Layers { - m.Cache.SetLayer(i) + if m.Cache != nil { + m.Cache.SetLayer(i) + } var outputs ml.Tensor if i == len(m.Layers)-1 { @@ -168,8 +179,7 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) { hiddenStates = layer.Forward(ctx, hiddenStates, positions, outputs, m.Cache, m.Options) } - hiddenStates = m.OutputNorm.Forward(ctx, hiddenStates, m.eps) - return m.Output.Forward(ctx, hiddenStates), nil + return m.OutputNorm.Forward(ctx, hiddenStates, m.eps), nil } func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) { @@ -227,4 +237,5 @@ func New(c fs.Config) (model.Model, error) { func init() { model.Register("qwen3", New) model.Register("qwen3moe", New) + model.Register("qwen3_embed", newEmbed) }