mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
fix llama and mistral3 models (#10774)
* fix llama model * fix mistral3.1 model do not set default vision layers
This commit is contained in:
@@ -1,9 +1,8 @@
|
|||||||
package llama
|
package llama
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"cmp"
|
||||||
"math"
|
"math"
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs"
|
"github.com/ollama/ollama/fs"
|
||||||
"github.com/ollama/ollama/kvcache"
|
"github.com/ollama/ollama/kvcache"
|
||||||
@@ -14,7 +13,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Options struct {
|
type Options struct {
|
||||||
hiddenSize, numHeads, numKVHeads int
|
hiddenSize, numHeads, numKVHeads, headDim int
|
||||||
eps, ropeBase, ropeScale float32
|
eps, ropeBase, ropeScale float32
|
||||||
ropeDim uint32
|
ropeDim uint32
|
||||||
}
|
}
|
||||||
@@ -32,10 +31,6 @@ type Model struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func New(c fs.Config) (model.Model, error) {
|
func New(c fs.Config) (model.Model, error) {
|
||||||
if !strings.EqualFold(c.String("tokenizer.ggml.model"), "gpt2") {
|
|
||||||
return nil, fmt.Errorf("tokenizer %s not yet supported", c.String("tokenizer.ggml.model"))
|
|
||||||
}
|
|
||||||
|
|
||||||
m := Model{
|
m := Model{
|
||||||
BytePairEncoding: model.NewBytePairEncoding(
|
BytePairEncoding: model.NewBytePairEncoding(
|
||||||
c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
|
c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
|
||||||
@@ -57,6 +52,7 @@ func New(c fs.Config) (model.Model, error) {
|
|||||||
hiddenSize: int(c.Uint("embedding_length")),
|
hiddenSize: int(c.Uint("embedding_length")),
|
||||||
numHeads: int(c.Uint("attention.head_count")),
|
numHeads: int(c.Uint("attention.head_count")),
|
||||||
numKVHeads: int(c.Uint("attention.head_count_kv")),
|
numKVHeads: int(c.Uint("attention.head_count_kv")),
|
||||||
|
headDim: int(c.Uint("attention.key_length")),
|
||||||
eps: c.Float("attention.layer_norm_rms_epsilon"),
|
eps: c.Float("attention.layer_norm_rms_epsilon"),
|
||||||
ropeBase: c.Float("rope.freq_base"),
|
ropeBase: c.Float("rope.freq_base"),
|
||||||
ropeScale: c.Float("rope.freq_scale", 1),
|
ropeScale: c.Float("rope.freq_scale", 1),
|
||||||
@@ -79,7 +75,7 @@ type SelfAttention struct {
|
|||||||
|
|
||||||
func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
|
func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
|
||||||
batchSize := hiddenState.Dim(1)
|
batchSize := hiddenState.Dim(1)
|
||||||
headDim := opts.hiddenSize / opts.numHeads
|
headDim := cmp.Or(opts.headDim, opts.hiddenSize/opts.numHeads)
|
||||||
ropeType := uint32(0)
|
ropeType := uint32(0)
|
||||||
|
|
||||||
q := sa.Query.Forward(ctx, hiddenState)
|
q := sa.Query.Forward(ctx, hiddenState)
|
||||||
@@ -95,7 +91,7 @@ func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Ten
|
|||||||
|
|
||||||
scaleFactor := 1.0 / math.Sqrt(float64(headDim))
|
scaleFactor := 1.0 / math.Sqrt(float64(headDim))
|
||||||
kqv := nn.Attention(ctx, q, k, v, scaleFactor, cache)
|
kqv := nn.Attention(ctx, q, k, v, scaleFactor, cache)
|
||||||
kqv = kqv.Reshape(ctx, opts.hiddenSize, batchSize)
|
kqv = kqv.Reshape(ctx, headDim*opts.numHeads, batchSize)
|
||||||
|
|
||||||
return sa.Output.Forward(ctx, kqv)
|
return sa.Output.Forward(ctx, kqv)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,11 +31,6 @@ var _ model.MultimodalProcessor = (*Model)(nil)
|
|||||||
var _ model.TextProcessor = (*Model)(nil)
|
var _ model.TextProcessor = (*Model)(nil)
|
||||||
|
|
||||||
func New(c fs.Config) (model.Model, error) {
|
func New(c fs.Config) (model.Model, error) {
|
||||||
textModel, err := NewTextModel(c)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
m := &Model{
|
m := &Model{
|
||||||
BytePairEncoding: model.NewBytePairEncoding(
|
BytePairEncoding: model.NewBytePairEncoding(
|
||||||
c.String("tokenizer.ggml.pretokenizer", `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
|
c.String("tokenizer.ggml.pretokenizer", `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
|
||||||
@@ -52,7 +47,7 @@ func New(c fs.Config) (model.Model, error) {
|
|||||||
),
|
),
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
TextModel: textModel,
|
TextModel: newTextModel(c),
|
||||||
VisionModel: newVisionModel(c),
|
VisionModel: newVisionModel(c),
|
||||||
ImageProcessor: newImageProcessor(c),
|
ImageProcessor: newImageProcessor(c),
|
||||||
MultiModalProjector: newMultiModalProjector(c),
|
MultiModalProjector: newMultiModalProjector(c),
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
package mistral3
|
package mistral3
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"cmp"
|
||||||
"math"
|
"math"
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs"
|
"github.com/ollama/ollama/fs"
|
||||||
"github.com/ollama/ollama/kvcache"
|
"github.com/ollama/ollama/kvcache"
|
||||||
@@ -37,10 +36,7 @@ type SelfAttention struct {
|
|||||||
func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor {
|
func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor {
|
||||||
batchSize := hiddenState.Dim(1)
|
batchSize := hiddenState.Dim(1)
|
||||||
ropeType := uint32(0)
|
ropeType := uint32(0)
|
||||||
headDim := opts.headDim
|
headDim := cmp.Or(opts.headDim, opts.hiddenSize/opts.numHeads)
|
||||||
if headDim == 0 {
|
|
||||||
headDim = opts.hiddenSize / opts.numHeads
|
|
||||||
}
|
|
||||||
|
|
||||||
q := sa.Query.Forward(ctx, hiddenState)
|
q := sa.Query.Forward(ctx, hiddenState)
|
||||||
q = q.Reshape(ctx, headDim, opts.numHeads, batchSize)
|
q = q.Reshape(ctx, headDim, opts.numHeads, batchSize)
|
||||||
@@ -125,12 +121,8 @@ func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor
|
|||||||
return m.Output.Forward(ctx, hiddenState)
|
return m.Output.Forward(ctx, hiddenState)
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewTextModel(c fs.Config) (*TextModel, error) {
|
func newTextModel(c fs.Config) *TextModel {
|
||||||
if !strings.EqualFold(c.String("tokenizer.ggml.model"), "gpt2") {
|
return &TextModel{
|
||||||
return nil, fmt.Errorf("tokenizer %s not yet supported", c.String("tokenizer.ggml.model"))
|
|
||||||
}
|
|
||||||
|
|
||||||
textModel := &TextModel{
|
|
||||||
Layers: make([]Layer, c.Uint("block_count")),
|
Layers: make([]Layer, c.Uint("block_count")),
|
||||||
TextOptions: &TextOptions{
|
TextOptions: &TextOptions{
|
||||||
hiddenSize: int(c.Uint("embedding_length")),
|
hiddenSize: int(c.Uint("embedding_length")),
|
||||||
@@ -143,6 +135,4 @@ func NewTextModel(c fs.Config) (*TextModel, error) {
|
|||||||
ropeDim: c.Uint("rope.dimension_count"),
|
ropeDim: c.Uint("rope.dimension_count"),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
return textModel, nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -170,7 +170,7 @@ func (m *VisionModel) Forward(ctx ml.Context, pixelValues ml.Tensor) ml.Tensor {
|
|||||||
|
|
||||||
func newVisionModel(c fs.Config) *VisionModel {
|
func newVisionModel(c fs.Config) *VisionModel {
|
||||||
return &VisionModel{
|
return &VisionModel{
|
||||||
Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count", 24)),
|
Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count")),
|
||||||
VisionModelOptions: &VisionModelOptions{
|
VisionModelOptions: &VisionModelOptions{
|
||||||
hiddenSize: int(c.Uint("vision.embedding_length", 1024)),
|
hiddenSize: int(c.Uint("vision.embedding_length", 1024)),
|
||||||
numHeads: int(c.Uint("vision.attention.head_count", 16)),
|
numHeads: int(c.Uint("vision.attention.head_count", 16)),
|
||||||
|
|||||||
Reference in New Issue
Block a user