feat(model): add qwen3vl (#12665)

This commit is contained in:
Michael Yang
2025-10-28 17:39:47 -07:00
committed by GitHub
parent 36d64fb531
commit 7d25b9e194
22 changed files with 1502 additions and 35 deletions

View File

@@ -161,6 +161,7 @@ type Tensor interface {
AvgPool2D(ctx Context, k, s int, p float32) Tensor
Conv2D(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
Conv3D(ctx Context, weight Tensor, c, s0, s1, s2, p0, p1, p2, d0, d1, d2 int) Tensor
IM2Col(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor

View File

@@ -1182,6 +1182,10 @@ func (t *Tensor) Concat(ctx ml.Context, t2 ml.Tensor, dim int) ml.Tensor {
}
func (t *Tensor) Contiguous(ctx ml.Context, shape ...int) ml.Tensor {
if slices.Contains(shape, -1) {
inferShape(t, shape)
}
switch len(shape) {
case 0:
return &Tensor{
@@ -1324,7 +1328,43 @@ func (t *Tensor) Copy(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
}
}
// inferShape updates shape in place to automatically set a single -1 dimesion
// based on the input tensor and the other dimensions
func inferShape(t *Tensor, shape []int) {
total := 1
for _, dim := range t.Shape() {
total *= dim
}
dim := -1
for i := range shape {
switch shape[i] {
case -1:
if dim != -1 {
panic("only one dimension can be inferred")
}
dim = i
case 0:
panic("dimension cannot be zero")
default:
if total%shape[i] != 0 {
panic("cannot infer dimension")
}
total /= shape[i]
}
}
if dim != -1 {
shape[dim] = total
}
}
func (t *Tensor) Reshape(ctx ml.Context, shape ...int) ml.Tensor {
if slices.Contains(shape, -1) {
inferShape(t, shape)
}
switch len(shape) {
case 1:
return &Tensor{
@@ -1537,6 +1577,16 @@ func (t *Tensor) Conv2D(ctx ml.Context, t2 ml.Tensor, s0, s1, p0, p1, d0, d1 int
}
}
func (t *Tensor) Conv3D(ctx ml.Context, t2 ml.Tensor, c, s0, s1, s2, p0, p1, p2, d0, d1, d2 int) ml.Tensor {
var tt ml.Tensor = &Tensor{
b: t.b,
t: C.ggml_conv_3d(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int64_t(c), C.int(s0), C.int(s1), C.int(s2), C.int(p0), C.int(p1), C.int(p2), C.int(d0), C.int(d1), C.int(d2)),
}
tt = tt.Reshape(ctx, t.Dim(3)/c, t2.Dim(3)/c)
return tt
}
func (t *Tensor) AvgPool2D(ctx ml.Context, k, s int, p float32) ml.Tensor {
return &Tensor{
b: t.b,

View File

@@ -0,0 +1,126 @@
package ggml
import (
"errors"
"os"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/ml"
)
func setup(tb testing.TB) ml.Context {
tb.Helper()
f, err := os.CreateTemp(tb.TempDir(), "*.bin")
if err != nil {
tb.Fatal(err)
}
defer f.Close()
if err := ggml.WriteGGUF(f, ggml.KV{"general.architecture": "test"}, nil); err != nil {
tb.Fatal(err)
}
b, err := ml.NewBackend(f.Name(), ml.BackendParams{})
if err != nil {
tb.Fatal(err)
}
ctx := b.NewContext().Input()
tb.Cleanup(func() {
ctx.Close()
b.Close()
})
return ctx
}
func TestInferShape(t *testing.T) {
cases := []struct {
name string
input []int
want []int
err error
}{
{
name: "no inferred shape",
input: []int{2, 3, 4},
want: []int{2, 3, 4},
},
{
name: "infer begin",
input: []int{-1, 3, 4},
want: []int{2, 3, 4},
},
{
name: "infer mid",
input: []int{2, -1, 4},
want: []int{2, 3, 4},
},
{
name: "infer end",
input: []int{2, 3, -1},
want: []int{2, 3, 4},
},
{
name: "too many inferred dims",
input: []int{-1, 3, -1},
err: errors.New("only one dimension can be inferred"),
},
{
name: "infer gather",
input: []int{2, -1},
want: []int{2, 12},
},
{
name: "infer gather all",
input: []int{-1},
want: []int{24},
},
{
name: "infer split",
input: []int{2, -1, 3, 2},
want: []int{2, 2, 3, 2},
},
{
name: "indivisible infer",
input: []int{2, -1, 2, 4},
err: errors.New("cannot infer dimension"),
},
{
name: "infer zero dim",
input: []int{2, 0, 4},
err: errors.New("dimension cannot be zero"),
},
}
ctx := setup(t)
tensor, ok := ctx.Empty(ml.DTypeF32, 2, 3, 4).(*Tensor)
if !ok {
t.Fatal("expected *Tensor")
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
defer func() {
if r := recover(); r == nil && tt.err == nil {
// all good
} else if r != nil && tt.err == nil {
t.Errorf("unexpected panic: %v", r)
} else if r == nil && tt.err != nil {
t.Errorf("expected panic but did not get one: %v", tt.err)
} else if errStr, ok := r.(string); ok && errStr != tt.err.Error() {
t.Errorf("expected panic %q but got %q", tt.err.Error(), errStr)
}
}()
inferShape(tensor, tt.input)
if diff := cmp.Diff(tt.want, tt.input); diff != "" {
t.Errorf("%s: shape mismatch (-want +got):\n%s", tt.name, diff)
}
})
}
}

View File

@@ -4,8 +4,26 @@ import "github.com/ollama/ollama/ml"
type Conv2D struct {
Weight ml.Tensor `gguf:"weight"`
Bias ml.Tensor `gguf:"bias"`
}
func (m *Conv2D) Forward(ctx ml.Context, t ml.Tensor, s0, s1, p0, p1, d0, d1 int) ml.Tensor {
return m.Weight.Conv2D(ctx, t, s0, s1, p0, p1, d0, d1)
t = m.Weight.Conv2D(ctx, t, s0, s1, p0, p1, d0, d1)
if m.Bias != nil {
t = t.Add(ctx, m.Bias)
}
return t
}
type Conv3D struct {
Weight ml.Tensor `gguf:"weight"`
Bias ml.Tensor `gguf:"bias"`
}
func (m *Conv3D) Forward(ctx ml.Context, t ml.Tensor, c, s0, s1, s2, p0, p1, p2, d0, d1, d2 int) ml.Tensor {
t = m.Weight.Conv3D(ctx, t, c, s0, s1, s2, p0, p1, p2, d0, d1, d2)
if m.Bias != nil {
t = t.Add(ctx, m.Bias)
}
return t
}