diff --git a/model/models/llama/model.go b/model/models/llama/model.go index 6e214f0f..9b85f1c7 100644 --- a/model/models/llama/model.go +++ b/model/models/llama/model.go @@ -1,9 +1,8 @@ package llama import ( - "fmt" + "cmp" "math" - "strings" "github.com/ollama/ollama/fs" "github.com/ollama/ollama/kvcache" @@ -14,9 +13,9 @@ import ( ) type Options struct { - hiddenSize, numHeads, numKVHeads int - eps, ropeBase, ropeScale float32 - ropeDim uint32 + hiddenSize, numHeads, numKVHeads, headDim int + eps, ropeBase, ropeScale float32 + ropeDim uint32 } type Model struct { @@ -32,10 +31,6 @@ type Model struct { } func New(c fs.Config) (model.Model, error) { - if !strings.EqualFold(c.String("tokenizer.ggml.model"), "gpt2") { - return nil, fmt.Errorf("tokenizer %s not yet supported", c.String("tokenizer.ggml.model")) - } - m := Model{ BytePairEncoding: model.NewBytePairEncoding( c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`), @@ -57,6 +52,7 @@ func New(c fs.Config) (model.Model, error) { hiddenSize: int(c.Uint("embedding_length")), numHeads: int(c.Uint("attention.head_count")), numKVHeads: int(c.Uint("attention.head_count_kv")), + headDim: int(c.Uint("attention.key_length")), eps: c.Float("attention.layer_norm_rms_epsilon"), ropeBase: c.Float("rope.freq_base"), ropeScale: c.Float("rope.freq_scale", 1), @@ -79,7 +75,7 @@ type SelfAttention struct { func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor { batchSize := hiddenState.Dim(1) - headDim := opts.hiddenSize / opts.numHeads + headDim := cmp.Or(opts.headDim, opts.hiddenSize/opts.numHeads) ropeType := uint32(0) q := sa.Query.Forward(ctx, hiddenState) @@ -95,7 +91,7 @@ func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Ten scaleFactor := 1.0 / math.Sqrt(float64(headDim)) kqv := nn.Attention(ctx, q, k, v, scaleFactor, cache) - kqv = kqv.Reshape(ctx, opts.hiddenSize, batchSize) + kqv = kqv.Reshape(ctx, headDim*opts.numHeads, batchSize) return sa.Output.Forward(ctx, kqv) } diff --git a/model/models/mistral3/model.go b/model/models/mistral3/model.go index 0d384b94..dd01a587 100644 --- a/model/models/mistral3/model.go +++ b/model/models/mistral3/model.go @@ -31,11 +31,6 @@ var _ model.MultimodalProcessor = (*Model)(nil) var _ model.TextProcessor = (*Model)(nil) func New(c fs.Config) (model.Model, error) { - textModel, err := NewTextModel(c) - if err != nil { - return nil, err - } - m := &Model{ BytePairEncoding: model.NewBytePairEncoding( c.String("tokenizer.ggml.pretokenizer", `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`), @@ -52,7 +47,7 @@ func New(c fs.Config) (model.Model, error) { ), }, ), - TextModel: textModel, + TextModel: newTextModel(c), VisionModel: newVisionModel(c), ImageProcessor: newImageProcessor(c), MultiModalProjector: newMultiModalProjector(c), diff --git a/model/models/mistral3/model_text.go b/model/models/mistral3/model_text.go index 17939800..57e2a40a 100644 --- a/model/models/mistral3/model_text.go +++ b/model/models/mistral3/model_text.go @@ -1,9 +1,8 @@ package mistral3 import ( - "fmt" + "cmp" "math" - "strings" "github.com/ollama/ollama/fs" "github.com/ollama/ollama/kvcache" @@ -37,10 +36,7 @@ type SelfAttention struct { func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor { batchSize := hiddenState.Dim(1) ropeType := uint32(0) - headDim := opts.headDim - if headDim == 0 { - headDim = opts.hiddenSize / opts.numHeads - } + headDim := cmp.Or(opts.headDim, opts.hiddenSize/opts.numHeads) q := sa.Query.Forward(ctx, hiddenState) q = q.Reshape(ctx, headDim, opts.numHeads, batchSize) @@ -125,12 +121,8 @@ func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor return m.Output.Forward(ctx, hiddenState) } -func NewTextModel(c fs.Config) (*TextModel, error) { - if !strings.EqualFold(c.String("tokenizer.ggml.model"), "gpt2") { - return nil, fmt.Errorf("tokenizer %s not yet supported", c.String("tokenizer.ggml.model")) - } - - textModel := &TextModel{ +func newTextModel(c fs.Config) *TextModel { + return &TextModel{ Layers: make([]Layer, c.Uint("block_count")), TextOptions: &TextOptions{ hiddenSize: int(c.Uint("embedding_length")), @@ -143,6 +135,4 @@ func NewTextModel(c fs.Config) (*TextModel, error) { ropeDim: c.Uint("rope.dimension_count"), }, } - - return textModel, nil } diff --git a/model/models/mistral3/model_vision.go b/model/models/mistral3/model_vision.go index 469dc40c..24541004 100644 --- a/model/models/mistral3/model_vision.go +++ b/model/models/mistral3/model_vision.go @@ -170,7 +170,7 @@ func (m *VisionModel) Forward(ctx ml.Context, pixelValues ml.Tensor) ml.Tensor { func newVisionModel(c fs.Config) *VisionModel { return &VisionModel{ - Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count", 24)), + Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count")), VisionModelOptions: &VisionModelOptions{ hiddenSize: int(c.Uint("vision.embedding_length", 1024)), numHeads: int(c.Uint("vision.attention.head_count", 16)),