convert: fix tensor sorting (#12015)

there's two bugs here.

1. the check for a layer id is incorrect and should be >= 0 since layer
   0 is valid
2. if both tensors have an layer identifier, it will only compare the
   layer id which will return 0 if the tensors are in the same layer.
   instead it should fallback to comparing the full tensor name
This commit is contained in:
Michael Yang
2025-08-26 13:57:46 -07:00
committed by GitHub
parent 85ccf7354d
commit 86834a2797
3 changed files with 29 additions and 25 deletions

View File

@@ -7,6 +7,7 @@ import (
"fmt" "fmt"
"io" "io"
"log/slog" "log/slog"
"math"
"slices" "slices"
"strings" "strings"
@@ -276,7 +277,7 @@ type Tensor struct {
func (t Tensor) block() (n int) { func (t Tensor) block() (n int) {
if _, err := fmt.Sscanf(t.Name, "blk.%d.", &n); err != nil { if _, err := fmt.Sscanf(t.Name, "blk.%d.", &n); err != nil {
return -1 return math.MaxInt
} }
return return

View File

@@ -533,12 +533,15 @@ func WriteGGUF(f *os.File, kv KV, ts []*Tensor) error {
} }
} }
slices.SortStableFunc(ts, func(a, b *Tensor) int { slices.SortStableFunc(
if i, j := a.block(), b.block(); i > 0 && j > 0 { ts,
return cmp.Compare(i, j) func(a, b *Tensor) int {
} return cmp.Or(
return cmp.Compare(a.Name, b.Name) cmp.Compare(a.block(), b.block()),
}) cmp.Compare(a.Name, b.Name),
)
},
)
var s uint64 var s uint64
for i := range ts { for i := range ts {

View File

@@ -11,24 +11,24 @@ import (
) )
func TestWriteGGUF(t *testing.T) { func TestWriteGGUF(t *testing.T) {
r := rand.New(rand.NewPCG(0, 0)) b := bytes.NewBuffer(make([]byte, 2*3))
for range 8 { for range 8 {
t.Run("shuffle", func(t *testing.T) { t.Run("shuffle", func(t *testing.T) {
t.Parallel() t.Parallel()
ts := []*Tensor{ ts := []*Tensor{
{Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))}, {Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: b},
{Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))}, {Name: "blk.0.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b},
{Name: "blk.1.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))}, {Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b},
{Name: "blk.2.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))}, {Name: "blk.1.ffn_up.weight", Shape: []uint64{2, 3}, WriterTo: b},
{Name: "blk.3.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))}, {Name: "blk.2.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b},
{Name: "blk.4.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))}, {Name: "blk.1.ffn_down.weight", Shape: []uint64{2, 3}, WriterTo: b},
{Name: "blk.5.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))}, {Name: "blk.0.attn_k.weight", Shape: []uint64{2, 3}, WriterTo: b},
{Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewBuffer(make([]byte, 3*2))}, {Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: b},
{Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewBuffer(make([]byte, 3*2))}, {Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: b},
} }
r.Shuffle(len(ts), func(i, j int) { rand.Shuffle(len(ts), func(i, j int) {
ts[i], ts[j] = ts[j], ts[i] ts[i], ts[j] = ts[j], ts[i]
}) })
@@ -63,14 +63,14 @@ func TestWriteGGUF(t *testing.T) {
} }
if diff := cmp.Diff(Tensors{ if diff := cmp.Diff(Tensors{
Offset: 608, Offset: 592,
items: []*Tensor{ items: []*Tensor{
{Name: "blk.0.attn_norm.weight", Offset: 0, Shape: []uint64{2, 3}}, {Name: "blk.0.attn_k.weight", Offset: 0, Shape: []uint64{2, 3}},
{Name: "blk.1.attn_norm.weight", Offset: 32, Shape: []uint64{2, 3}}, {Name: "blk.0.attn_norm.weight", Offset: 32, Shape: []uint64{2, 3}},
{Name: "blk.2.attn_norm.weight", Offset: 64, Shape: []uint64{2, 3}}, {Name: "blk.0.ffn_norm.weight", Offset: 64, Shape: []uint64{2, 3}},
{Name: "blk.3.attn_norm.weight", Offset: 96, Shape: []uint64{2, 3}}, {Name: "blk.1.ffn_down.weight", Offset: 96, Shape: []uint64{2, 3}},
{Name: "blk.4.attn_norm.weight", Offset: 128, Shape: []uint64{2, 3}}, {Name: "blk.1.ffn_up.weight", Offset: 128, Shape: []uint64{2, 3}},
{Name: "blk.5.attn_norm.weight", Offset: 160, Shape: []uint64{2, 3}}, {Name: "blk.2.ffn_norm.weight", Offset: 160, Shape: []uint64{2, 3}},
{Name: "output.weight", Offset: 192, Shape: []uint64{3, 2}}, {Name: "output.weight", Offset: 192, Shape: []uint64{3, 2}},
{Name: "output_norm.weight", Offset: 224, Shape: []uint64{3, 2}}, {Name: "output_norm.weight", Offset: 224, Shape: []uint64{3, 2}},
{Name: "token_embd.weight", Offset: 256, Shape: []uint64{2, 3}}, {Name: "token_embd.weight", Offset: 256, Shape: []uint64{2, 3}},