feat(model): add qwen3vl (#12665)

This commit is contained in:
Michael Yang
2025-10-28 17:39:47 -07:00
committed by GitHub
parent 36d64fb531
commit 7d25b9e194
22 changed files with 1502 additions and 35 deletions

View File

@@ -235,15 +235,28 @@ func countCommonPrefix(a []*input.Input, b []*input.Input) int32 {
return count
}
// TODO(jessegross): If we need to reprocess the inputs we should ensure that
// we don't split up a SameBatch
func (c *InputCache) ShiftDiscard(inputLen int32, numKeep int32) int32 {
targetFree := (c.numCtx - numKeep) / 2
targetFree = max(targetFree, 1)
// ShiftDiscard computes how many inputs can be discarded from the cache. Inputs in the same batch
// are discarded together.
func (c *InputCache) ShiftDiscard(inputs []*input.Input, numKeep int32) int32 {
targetFree := max((c.numCtx-numKeep)/2, 1)
currentFree := c.numCtx - int32(len(inputs))
currentFree := c.numCtx - inputLen
var discard, sameBatch int32
for _, input := range inputs[numKeep:] {
if sameBatch <= 0 && currentFree >= targetFree {
break
}
return max(targetFree-currentFree, 0)
sameBatch--
currentFree++
discard++
if input.SameBatch > 0 {
sameBatch = int32(input.SameBatch)
}
}
return discard
}
type ErrReprocessInputs struct {
@@ -264,7 +277,7 @@ func (c *InputCache) ShiftCacheSlot(slot *InputCacheSlot, numKeep int32) error {
}
inputLen := int32(len(slot.Inputs))
discard := c.ShiftDiscard(inputLen, numKeep)
discard := c.ShiftDiscard(slot.Inputs, numKeep)
if discard <= 0 {
return nil

View File

@@ -3,6 +3,7 @@ package ollamarunner
import (
"errors"
"fmt"
"slices"
"testing"
"time"
@@ -238,59 +239,137 @@ func TestShiftDiscard(t *testing.T) {
name string
numCtx int32
numKeep int32
inputLen int32
inputs []*input.Input
expected int32
}{
{
name: "Shift",
numCtx: 2048,
numKeep: 5,
inputLen: 2048,
inputs: slices.Repeat([]*input.Input{{}}, 2048),
expected: 1021,
},
{
name: "Max Keep",
numCtx: 2048,
numKeep: 2047,
inputLen: 2048,
inputs: slices.Repeat([]*input.Input{{}}, 2048),
expected: 1,
},
{
name: "No Keep",
numCtx: 2048,
numKeep: 0,
inputLen: 2048,
inputs: slices.Repeat([]*input.Input{{}}, 2048),
expected: 1024,
},
{
name: "Truncate",
numCtx: 2048,
numKeep: 5,
inputLen: 5000,
inputs: slices.Repeat([]*input.Input{{}}, 5000),
expected: 3973,
},
{
name: "Truncate Keep",
numCtx: 2048,
numKeep: 2047,
inputLen: 5000,
inputs: slices.Repeat([]*input.Input{{}}, 5000),
expected: 2953,
},
{
name: "No Op",
numCtx: 2048,
numKeep: 5,
inputLen: 512,
inputs: slices.Repeat([]*input.Input{{}}, 512),
expected: 0,
},
{
name: "Same Batch",
numCtx: 2048,
numKeep: 5,
inputs: slices.Collect(func(yield func(*input.Input) bool) {
for range 1024 {
if !yield(&input.Input{}) {
return
}
}
if !yield(&input.Input{SameBatch: 512 - 1}) {
return
}
for range 2048 - 1024 - 1 {
if !yield(&input.Input{}) {
return
}
}
}),
expected: 1531,
},
{
name: "Same Batch Near Start",
numCtx: 2048,
numKeep: 5,
inputs: slices.Collect(func(yield func(*input.Input) bool) {
for range 10 {
if !yield(&input.Input{}) {
return
}
}
if !yield(&input.Input{SameBatch: 512 - 1}) {
return
}
for range 2048 - 10 - 1 {
if !yield(&input.Input{}) {
return
}
}
}),
expected: 1021,
},
{
name: "Consecutive Same Batch",
numCtx: 32,
inputs: slices.Collect(func(yield func(*input.Input) bool) {
for i := range 32 {
input := input.Input{}
if i%10 == 0 {
input.SameBatch = 10 - 1
}
if !yield(&input) {
return
}
}
}),
expected: 20,
},
{
name: "Overlapping Same Batch",
numCtx: 32,
inputs: slices.Collect(func(yield func(*input.Input) bool) {
for i := range 32 {
input := input.Input{}
if slices.Contains([]int{4, 8, 14}, i) {
input.SameBatch = 10 - 1
}
if !yield(&input) {
return
}
}
}),
expected: 24,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
c := InputCache{numCtx: tt.numCtx}
result := c.ShiftDiscard(tt.inputLen, tt.numKeep)
result := c.ShiftDiscard(tt.inputs, tt.numKeep)
if result != tt.expected {
t.Errorf("shiftDiscard(ctx: %v, keep: %v input: %v): have %v; want %v", tt.numCtx, tt.numKeep, tt.inputLen, result, tt.expected)
t.Errorf("shiftDiscard(ctx: %v, keep: %v inputs: %v): have %v; want %v", tt.numCtx, tt.numKeep, len(tt.inputs), result, tt.expected)
}
})
}

View File

@@ -214,7 +214,6 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]*input.Input,
parts = []string{prompt}
}
postTokenize := false
for i, part := range parts {
// text - tokenize
tokens, err := s.model.(model.TextProcessor).Encode(part, i == 0)
@@ -257,11 +256,10 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]*input.Input,
mmStore.addMultimodal(imageEmbeddings)
inputs = append(inputs, &input.Input{Multimodal: imageEmbeddings, MultimodalHash: imageHash})
postTokenize = true
}
}
if visionModel && postTokenize {
if visionModel {
var err error
inputs, err = multimodalProcessor.PostTokenize(inputs)
if err != nil {