mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-22 06:43:57 +00:00
convert: fix tensor sorting (#12015)
there's two bugs here. 1. the check for a layer id is incorrect and should be >= 0 since layer 0 is valid 2. if both tensors have an layer identifier, it will only compare the layer id which will return 0 if the tensors are in the same layer. instead it should fallback to comparing the full tensor name
This commit is contained in:
@@ -7,6 +7,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"math"
|
||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@@ -276,7 +277,7 @@ type Tensor struct {
|
|||||||
|
|
||||||
func (t Tensor) block() (n int) {
|
func (t Tensor) block() (n int) {
|
||||||
if _, err := fmt.Sscanf(t.Name, "blk.%d.", &n); err != nil {
|
if _, err := fmt.Sscanf(t.Name, "blk.%d.", &n); err != nil {
|
||||||
return -1
|
return math.MaxInt
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -533,12 +533,15 @@ func WriteGGUF(f *os.File, kv KV, ts []*Tensor) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
slices.SortStableFunc(ts, func(a, b *Tensor) int {
|
slices.SortStableFunc(
|
||||||
if i, j := a.block(), b.block(); i > 0 && j > 0 {
|
ts,
|
||||||
return cmp.Compare(i, j)
|
func(a, b *Tensor) int {
|
||||||
}
|
return cmp.Or(
|
||||||
return cmp.Compare(a.Name, b.Name)
|
cmp.Compare(a.block(), b.block()),
|
||||||
})
|
cmp.Compare(a.Name, b.Name),
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
var s uint64
|
var s uint64
|
||||||
for i := range ts {
|
for i := range ts {
|
||||||
|
|||||||
@@ -11,24 +11,24 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestWriteGGUF(t *testing.T) {
|
func TestWriteGGUF(t *testing.T) {
|
||||||
r := rand.New(rand.NewPCG(0, 0))
|
b := bytes.NewBuffer(make([]byte, 2*3))
|
||||||
for range 8 {
|
for range 8 {
|
||||||
t.Run("shuffle", func(t *testing.T) {
|
t.Run("shuffle", func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
ts := []*Tensor{
|
ts := []*Tensor{
|
||||||
{Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
|
{Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: b},
|
||||||
{Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
|
{Name: "blk.0.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b},
|
||||||
{Name: "blk.1.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
|
{Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b},
|
||||||
{Name: "blk.2.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
|
{Name: "blk.1.ffn_up.weight", Shape: []uint64{2, 3}, WriterTo: b},
|
||||||
{Name: "blk.3.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
|
{Name: "blk.2.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b},
|
||||||
{Name: "blk.4.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
|
{Name: "blk.1.ffn_down.weight", Shape: []uint64{2, 3}, WriterTo: b},
|
||||||
{Name: "blk.5.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
|
{Name: "blk.0.attn_k.weight", Shape: []uint64{2, 3}, WriterTo: b},
|
||||||
{Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewBuffer(make([]byte, 3*2))},
|
{Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: b},
|
||||||
{Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewBuffer(make([]byte, 3*2))},
|
{Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: b},
|
||||||
}
|
}
|
||||||
|
|
||||||
r.Shuffle(len(ts), func(i, j int) {
|
rand.Shuffle(len(ts), func(i, j int) {
|
||||||
ts[i], ts[j] = ts[j], ts[i]
|
ts[i], ts[j] = ts[j], ts[i]
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -63,14 +63,14 @@ func TestWriteGGUF(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if diff := cmp.Diff(Tensors{
|
if diff := cmp.Diff(Tensors{
|
||||||
Offset: 608,
|
Offset: 592,
|
||||||
items: []*Tensor{
|
items: []*Tensor{
|
||||||
{Name: "blk.0.attn_norm.weight", Offset: 0, Shape: []uint64{2, 3}},
|
{Name: "blk.0.attn_k.weight", Offset: 0, Shape: []uint64{2, 3}},
|
||||||
{Name: "blk.1.attn_norm.weight", Offset: 32, Shape: []uint64{2, 3}},
|
{Name: "blk.0.attn_norm.weight", Offset: 32, Shape: []uint64{2, 3}},
|
||||||
{Name: "blk.2.attn_norm.weight", Offset: 64, Shape: []uint64{2, 3}},
|
{Name: "blk.0.ffn_norm.weight", Offset: 64, Shape: []uint64{2, 3}},
|
||||||
{Name: "blk.3.attn_norm.weight", Offset: 96, Shape: []uint64{2, 3}},
|
{Name: "blk.1.ffn_down.weight", Offset: 96, Shape: []uint64{2, 3}},
|
||||||
{Name: "blk.4.attn_norm.weight", Offset: 128, Shape: []uint64{2, 3}},
|
{Name: "blk.1.ffn_up.weight", Offset: 128, Shape: []uint64{2, 3}},
|
||||||
{Name: "blk.5.attn_norm.weight", Offset: 160, Shape: []uint64{2, 3}},
|
{Name: "blk.2.ffn_norm.weight", Offset: 160, Shape: []uint64{2, 3}},
|
||||||
{Name: "output.weight", Offset: 192, Shape: []uint64{3, 2}},
|
{Name: "output.weight", Offset: 192, Shape: []uint64{3, 2}},
|
||||||
{Name: "output_norm.weight", Offset: 224, Shape: []uint64{3, 2}},
|
{Name: "output_norm.weight", Offset: 224, Shape: []uint64{3, 2}},
|
||||||
{Name: "token_embd.weight", Offset: 256, Shape: []uint64{2, 3}},
|
{Name: "token_embd.weight", Offset: 256, Shape: []uint64{2, 3}},
|
||||||
|
|||||||
Reference in New Issue
Block a user