mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 14:26:30 +00:00
llama4
This commit is contained in:
committed by
Michael Yang
parent
54055a6dae
commit
f0c66e6dea
@@ -173,6 +173,8 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
|
||||
switch p.Architectures[0] {
|
||||
case "LlamaForCausalLM":
|
||||
conv = &llamaModel{}
|
||||
case "Llama4ForConditionalGeneration":
|
||||
conv = &llama4Model{}
|
||||
case "Mistral3ForConditionalGeneration":
|
||||
conv = &mistral3Model{}
|
||||
case "MixtralForCausalLM":
|
||||
|
||||
@@ -42,6 +42,8 @@ type llamaModel struct {
|
||||
LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
|
||||
NormEpsilon float32 `json:"norm_epsilon"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
|
||||
skipRepack bool
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*llamaModel)(nil)
|
||||
@@ -70,6 +72,10 @@ func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
|
||||
kv["llama.rope.dimension_count"] = p.HiddenSize / headCount
|
||||
}
|
||||
|
||||
if p.HeadDim > 0 {
|
||||
kv["llama.attention.head_dim"] = p.HeadDim
|
||||
}
|
||||
|
||||
if p.RopeTheta > 0 {
|
||||
kv["llama.rope.freq_base"] = p.RopeTheta
|
||||
}
|
||||
@@ -133,9 +139,10 @@ func (p *llamaModel) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
}
|
||||
|
||||
for _, t := range ts {
|
||||
if strings.HasSuffix(t.Name(), "attn_q.weight") ||
|
||||
strings.HasSuffix(t.Name(), "attn_k.weight") {
|
||||
t.SetRepacker(p.repack)
|
||||
if strings.HasSuffix(t.Name(), "attn_q.weight") || strings.HasSuffix(t.Name(), "attn_k.weight") {
|
||||
if !p.skipRepack {
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
}
|
||||
|
||||
out = append(out, ggml.Tensor{
|
||||
|
||||
167
convert/convert_llama4.go
Normal file
167
convert/convert_llama4.go
Normal file
@@ -0,0 +1,167 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type llama4Model struct {
|
||||
ModelParameters
|
||||
TextModel struct {
|
||||
llamaModel
|
||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||
NumLocalExperts uint32 `json:"num_local_experts"`
|
||||
InterleaveMOELayerStep uint32 `json:"interleave_moe_layer_step"`
|
||||
UseQKNorm bool `json:"use_qk_norm"`
|
||||
IntermediateSizeMLP uint32 `json:"intermediate_size_mlp"`
|
||||
} `json:"text_config"`
|
||||
VisionModel struct {
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
ImageSize uint32 `json:"image_size"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
NormEpsilon float32 `json:"norm_eps"`
|
||||
PixelShuffleRatio float32 `json:"pixel_shuffle_ratio"`
|
||||
} `json:"vision_config"`
|
||||
}
|
||||
|
||||
// KV implements ModelConverter.
|
||||
func (p *llama4Model) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "llama4"
|
||||
|
||||
for k, v := range p.TextModel.KV(t) {
|
||||
if strings.HasPrefix(k, "llama.") {
|
||||
kv[strings.ReplaceAll(k, "llama.", "llama4.")] = v
|
||||
}
|
||||
}
|
||||
|
||||
kv["llama4.intermediate_size"] = p.TextModel.IntermediateSizeMLP
|
||||
kv["llama4.intermediate_size_moe"] = p.TextModel.IntermediateSize
|
||||
|
||||
kv["llama4.expert_count"] = p.TextModel.NumLocalExperts
|
||||
kv["llama4.expert_used_count"] = p.TextModel.NumExpertsPerToken
|
||||
kv["llama4.interleave_moe_layer_step"] = p.TextModel.InterleaveMOELayerStep
|
||||
kv["llama4.use_qk_norm"] = p.TextModel.UseQKNorm
|
||||
|
||||
kv["llama4.vision.block_count"] = p.VisionModel.NumHiddenLayers
|
||||
kv["llama4.vision.embedding_length"] = p.VisionModel.HiddenSize
|
||||
kv["llama4.vision.feed_forward_length"] = p.VisionModel.IntermediateSize
|
||||
kv["llama4.vision.attention.head_count"] = p.VisionModel.NumAttentionHeads
|
||||
kv["llama4.vision.image_size"] = p.VisionModel.ImageSize
|
||||
kv["llama4.vision.patch_size"] = p.VisionModel.PatchSize
|
||||
kv["llama4.vision.rope.freq_base"] = p.VisionModel.RopeTheta
|
||||
kv["llama4.vision.layer_norm_epsilon"] = p.VisionModel.NormEpsilon
|
||||
kv["llama4.vision.pixel_shuffle_ratio"] = p.VisionModel.PixelShuffleRatio
|
||||
return kv
|
||||
}
|
||||
|
||||
// Replacements implements ModelConverter.
|
||||
func (p *llama4Model) Replacements() []string {
|
||||
return append(
|
||||
p.TextModel.Replacements(),
|
||||
"language_model.", "",
|
||||
"vision_model", "v",
|
||||
"multi_modal_projector", "mm",
|
||||
"feed_forward.down_proj", "ffn_down",
|
||||
"feed_forward.up_proj", "ffn_up",
|
||||
"feed_forward.gate_proj", "ffn_gate",
|
||||
"feed_forward.", "ffn_",
|
||||
"shared_expert.down_proj", "down_shexp",
|
||||
"shared_expert.gate_proj", "gate_shexp",
|
||||
"shared_expert.up_proj", "up_shexp",
|
||||
"experts.down_proj", "down_exps.weight",
|
||||
"experts.gate_up_proj", "gate_up_exps.weight",
|
||||
"router", "gate_inp",
|
||||
"patch_embedding.linear", "patch_embedding",
|
||||
)
|
||||
}
|
||||
|
||||
// Tensors implements ModelConverter.
|
||||
func (p *llama4Model) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
|
||||
var textTensors []Tensor
|
||||
for _, t := range ts {
|
||||
if strings.HasPrefix(t.Name(), "v.") || strings.HasPrefix(t.Name(), "mm.") {
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
} else if strings.Contains(t.Name(), "ffn_gate_up_exps") {
|
||||
// gate and up projectors are fused
|
||||
// dims[1], dims[2] must be swapped
|
||||
// [experts, hidden_size, intermediate_size * 2] --> [experts, intermediate_size, hidden_size]
|
||||
halfDim := int(t.Shape()[2]) / 2
|
||||
|
||||
newShape := slices.Clone(t.Shape())
|
||||
newShape[1], newShape[2] = newShape[2]/2, newShape[1]
|
||||
for i, name := range []string{"ffn_gate_exps", "ffn_up_exps"} {
|
||||
// clone tensor since we need separate repackers
|
||||
tt := t.Clone()
|
||||
tt.SetRepacker(p.repack(nil, nil, tensor.S(i*halfDim, (i+1)*halfDim)))
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: strings.ReplaceAll(tt.Name(), "ffn_gate_up_exps", name),
|
||||
Kind: tt.Kind(),
|
||||
Shape: newShape,
|
||||
WriterTo: tt,
|
||||
})
|
||||
}
|
||||
} else if strings.Contains(t.Name(), "ffn_down_exps") {
|
||||
// dims[1], dims[2] must be swapped
|
||||
// [experts, intermediate_size, hidden_size] --> [experts, hidden_size, intermediate_size]
|
||||
t.SetRepacker(p.repack())
|
||||
newShape := slices.Clone(t.Shape())
|
||||
newShape[1], newShape[2] = newShape[2], newShape[1]
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: newShape,
|
||||
WriterTo: t,
|
||||
})
|
||||
} else {
|
||||
textTensors = append(textTensors, t)
|
||||
}
|
||||
}
|
||||
|
||||
p.TextModel.skipRepack = true
|
||||
out = append(out, p.TextModel.Tensors(textTensors)...)
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *llama4Model) repack(slice ...tensor.Slice) Repacker {
|
||||
return func(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
dims := make([]int, len(shape))
|
||||
for i, dim := range shape {
|
||||
dims[i] = int(dim)
|
||||
}
|
||||
|
||||
var t tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
t, err := t.Slice(slice...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := t.T(0, 2, 1); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
t = tensor.Materialize(t)
|
||||
// flatten tensor so it can be return as a vector
|
||||
if err := t.Reshape(t.Shape().TotalSize()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return native.VectorF32(t.(*tensor.Dense))
|
||||
}
|
||||
}
|
||||
@@ -11,14 +11,15 @@ type Tensor interface {
|
||||
Name() string
|
||||
Shape() []uint64
|
||||
Kind() uint32
|
||||
SetRepacker(repacker)
|
||||
SetRepacker(Repacker)
|
||||
WriteTo(io.Writer) (int64, error)
|
||||
Clone() Tensor
|
||||
}
|
||||
|
||||
type tensorBase struct {
|
||||
name string
|
||||
shape []uint64
|
||||
repacker
|
||||
name string
|
||||
shape []uint64
|
||||
repacker Repacker
|
||||
}
|
||||
|
||||
func (t tensorBase) Name() string {
|
||||
@@ -36,7 +37,8 @@ const (
|
||||
|
||||
func (t tensorBase) Kind() uint32 {
|
||||
if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
|
||||
t.name == "token_types.weight" {
|
||||
t.name == "token_types.weight" ||
|
||||
t.name == "v.positional_embedding_vlm" {
|
||||
// these tensors are always F32
|
||||
return 0
|
||||
}
|
||||
@@ -51,11 +53,11 @@ func (t tensorBase) Kind() uint32 {
|
||||
}
|
||||
}
|
||||
|
||||
func (t *tensorBase) SetRepacker(fn repacker) {
|
||||
func (t *tensorBase) SetRepacker(fn Repacker) {
|
||||
t.repacker = fn
|
||||
}
|
||||
|
||||
type repacker func(string, []float32, []uint64) ([]float32, error)
|
||||
type Repacker func(string, []float32, []uint64) ([]float32, error)
|
||||
|
||||
func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
|
||||
patterns := []struct {
|
||||
|
||||
@@ -94,6 +94,21 @@ type safetensor struct {
|
||||
*tensorBase
|
||||
}
|
||||
|
||||
func (st safetensor) Clone() Tensor {
|
||||
return &safetensor{
|
||||
fs: st.fs,
|
||||
path: st.path,
|
||||
dtype: st.dtype,
|
||||
offset: st.offset,
|
||||
size: st.size,
|
||||
tensorBase: &tensorBase{
|
||||
name: st.name,
|
||||
repacker: st.repacker,
|
||||
shape: slices.Clone(st.shape),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (st safetensor) WriteTo(w io.Writer) (int64, error) {
|
||||
f, err := st.fs.Open(st.path)
|
||||
if err != nil {
|
||||
|
||||
@@ -43,6 +43,17 @@ type torch struct {
|
||||
*tensorBase
|
||||
}
|
||||
|
||||
func (t torch) Clone() Tensor {
|
||||
return torch{
|
||||
storage: t.storage,
|
||||
tensorBase: &tensorBase{
|
||||
name: t.name,
|
||||
shape: t.shape,
|
||||
repacker: t.repacker,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (pt torch) WriteTo(w io.Writer) (int64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user