From 86834a279781d04d701babd3f14f36be9cc961e5 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Tue, 26 Aug 2025 13:57:46 -0700 Subject: [PATCH] convert: fix tensor sorting (#12015) there's two bugs here. 1. the check for a layer id is incorrect and should be >= 0 since layer 0 is valid 2. if both tensors have an layer identifier, it will only compare the layer id which will return 0 if the tensors are in the same layer. instead it should fallback to comparing the full tensor name --- fs/ggml/ggml.go | 3 ++- fs/ggml/gguf.go | 15 +++++++++------ fs/ggml/gguf_test.go | 36 ++++++++++++++++++------------------ 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index dca0187b..feba55ed 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "log/slog" + "math" "slices" "strings" @@ -276,7 +277,7 @@ type Tensor struct { func (t Tensor) block() (n int) { if _, err := fmt.Sscanf(t.Name, "blk.%d.", &n); err != nil { - return -1 + return math.MaxInt } return diff --git a/fs/ggml/gguf.go b/fs/ggml/gguf.go index 413eab5e..fa613ca4 100644 --- a/fs/ggml/gguf.go +++ b/fs/ggml/gguf.go @@ -533,12 +533,15 @@ func WriteGGUF(f *os.File, kv KV, ts []*Tensor) error { } } - slices.SortStableFunc(ts, func(a, b *Tensor) int { - if i, j := a.block(), b.block(); i > 0 && j > 0 { - return cmp.Compare(i, j) - } - return cmp.Compare(a.Name, b.Name) - }) + slices.SortStableFunc( + ts, + func(a, b *Tensor) int { + return cmp.Or( + cmp.Compare(a.block(), b.block()), + cmp.Compare(a.Name, b.Name), + ) + }, + ) var s uint64 for i := range ts { diff --git a/fs/ggml/gguf_test.go b/fs/ggml/gguf_test.go index bf767918..e56bab8d 100644 --- a/fs/ggml/gguf_test.go +++ b/fs/ggml/gguf_test.go @@ -11,24 +11,24 @@ import ( ) func TestWriteGGUF(t *testing.T) { - r := rand.New(rand.NewPCG(0, 0)) + b := bytes.NewBuffer(make([]byte, 2*3)) for range 8 { t.Run("shuffle", func(t *testing.T) { t.Parallel() ts := []*Tensor{ - {Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))}, - {Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))}, - {Name: "blk.1.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))}, - {Name: "blk.2.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))}, - {Name: "blk.3.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))}, - {Name: "blk.4.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))}, - {Name: "blk.5.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))}, - {Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewBuffer(make([]byte, 3*2))}, - {Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewBuffer(make([]byte, 3*2))}, + {Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: b}, + {Name: "blk.0.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b}, + {Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b}, + {Name: "blk.1.ffn_up.weight", Shape: []uint64{2, 3}, WriterTo: b}, + {Name: "blk.2.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b}, + {Name: "blk.1.ffn_down.weight", Shape: []uint64{2, 3}, WriterTo: b}, + {Name: "blk.0.attn_k.weight", Shape: []uint64{2, 3}, WriterTo: b}, + {Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: b}, + {Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: b}, } - r.Shuffle(len(ts), func(i, j int) { + rand.Shuffle(len(ts), func(i, j int) { ts[i], ts[j] = ts[j], ts[i] }) @@ -63,14 +63,14 @@ func TestWriteGGUF(t *testing.T) { } if diff := cmp.Diff(Tensors{ - Offset: 608, + Offset: 592, items: []*Tensor{ - {Name: "blk.0.attn_norm.weight", Offset: 0, Shape: []uint64{2, 3}}, - {Name: "blk.1.attn_norm.weight", Offset: 32, Shape: []uint64{2, 3}}, - {Name: "blk.2.attn_norm.weight", Offset: 64, Shape: []uint64{2, 3}}, - {Name: "blk.3.attn_norm.weight", Offset: 96, Shape: []uint64{2, 3}}, - {Name: "blk.4.attn_norm.weight", Offset: 128, Shape: []uint64{2, 3}}, - {Name: "blk.5.attn_norm.weight", Offset: 160, Shape: []uint64{2, 3}}, + {Name: "blk.0.attn_k.weight", Offset: 0, Shape: []uint64{2, 3}}, + {Name: "blk.0.attn_norm.weight", Offset: 32, Shape: []uint64{2, 3}}, + {Name: "blk.0.ffn_norm.weight", Offset: 64, Shape: []uint64{2, 3}}, + {Name: "blk.1.ffn_down.weight", Offset: 96, Shape: []uint64{2, 3}}, + {Name: "blk.1.ffn_up.weight", Offset: 128, Shape: []uint64{2, 3}}, + {Name: "blk.2.ffn_norm.weight", Offset: 160, Shape: []uint64{2, 3}}, {Name: "output.weight", Offset: 192, Shape: []uint64{3, 2}}, {Name: "output_norm.weight", Offset: 224, Shape: []uint64{3, 2}}, {Name: "token_embd.weight", Offset: 256, Shape: []uint64{2, 3}},