mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 14:26:30 +00:00
fix mllama conversion (#10716)
cross attention Q and K projections needs to have their heads swapped, similar to non-cross attention Q and K tensors
This commit is contained in:
@@ -139,7 +139,8 @@ func (p *llamaModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
if strings.HasSuffix(t.Name(), "attn_q.weight") || strings.HasSuffix(t.Name(), "attn_k.weight") {
|
if strings.HasSuffix(t.Name(), "attn_q.weight") || strings.HasSuffix(t.Name(), "attn_k.weight") ||
|
||||||
|
strings.HasSuffix(t.Name(), "attn_q_proj.weight") || strings.HasSuffix(t.Name(), "attn_k_proj.weight") {
|
||||||
if !p.skipRepack {
|
if !p.skipRepack {
|
||||||
t.SetRepacker(p.repack)
|
t.SetRepacker(p.repack)
|
||||||
}
|
}
|
||||||
@@ -181,9 +182,9 @@ func (p *llamaModel) repack(name string, data []float32, shape []uint64) ([]floa
|
|||||||
}
|
}
|
||||||
|
|
||||||
var heads uint32
|
var heads uint32
|
||||||
if strings.HasSuffix(name, "attn_q.weight") {
|
if strings.HasSuffix(name, "attn_q.weight") || strings.HasSuffix(name, "attn_q_proj.weight") {
|
||||||
heads = p.NumAttentionHeads
|
heads = p.NumAttentionHeads
|
||||||
} else if strings.HasSuffix(name, "attn_k.weight") {
|
} else if strings.HasSuffix(name, "attn_k.weight") || strings.HasSuffix(name, "attn_k_proj.weight") {
|
||||||
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||||
} else {
|
} else {
|
||||||
return nil, fmt.Errorf("unknown tensor for repack: %s", name)
|
return nil, fmt.Errorf("unknown tensor for repack: %s", name)
|
||||||
|
|||||||
Reference in New Issue
Block a user