feat(model): add qwen3vl (#12665)

2025-12-21 14:26:30 +00:00 · 2025-10-28 17:39:47 -07:00
parent 36d64fb531
commit 7d25b9e194
22 changed files with 1502 additions and 35 deletions
--- a/runner/ollamarunner/cache.go
+++ b/runner/ollamarunner/cache.go
@@ -235,15 +235,28 @@ func countCommonPrefix(a []*input.Input, b []*input.Input) int32 {
 	return count
 }

-// TODO(jessegross): If we need to reprocess the inputs we should ensure that
-// we don't split up a SameBatch
-func (c *InputCache) ShiftDiscard(inputLen int32, numKeep int32) int32 {
-	targetFree := (c.numCtx - numKeep) / 2
-	targetFree = max(targetFree, 1)
+// ShiftDiscard computes how many inputs can be discarded from the cache. Inputs in the same batch
+// are discarded together.
+func (c *InputCache) ShiftDiscard(inputs []*input.Input, numKeep int32) int32 {
+	targetFree := max((c.numCtx-numKeep)/2, 1)
+	currentFree := c.numCtx - int32(len(inputs))

-	currentFree := c.numCtx - inputLen
+	var discard, sameBatch int32
+	for _, input := range inputs[numKeep:] {
+		if sameBatch <= 0 && currentFree >= targetFree {
+			break
+		}

-	return max(targetFree-currentFree, 0)
+		sameBatch--
+		currentFree++
+		discard++
+
+		if input.SameBatch > 0 {
+			sameBatch = int32(input.SameBatch)
+		}
+	}
+
+	return discard
 }

 type ErrReprocessInputs struct {
@@ -264,7 +277,7 @@ func (c *InputCache) ShiftCacheSlot(slot *InputCacheSlot, numKeep int32) error {
 	}

 	inputLen := int32(len(slot.Inputs))
-	discard := c.ShiftDiscard(inputLen, numKeep)
+	discard := c.ShiftDiscard(slot.Inputs, numKeep)

 	if discard <= 0 {
 		return nil
--- a/runner/ollamarunner/cache_test.go
+++ b/runner/ollamarunner/cache_test.go
@@ -3,6 +3,7 @@ package ollamarunner
 import (
 	"errors"
 	"fmt"
+	"slices"
 	"testing"
 	"time"

@@ -238,59 +239,137 @@ func TestShiftDiscard(t *testing.T) {
 		name     string
 		numCtx   int32
 		numKeep  int32
-		inputLen int32
+		inputs   []*input.Input
 		expected int32
 	}{
 		{
 			name:     "Shift",
 			numCtx:   2048,
 			numKeep:  5,
-			inputLen: 2048,
+			inputs:   slices.Repeat([]*input.Input{{}}, 2048),
 			expected: 1021,
 		},
 		{
 			name:     "Max Keep",
 			numCtx:   2048,
 			numKeep:  2047,
-			inputLen: 2048,
+			inputs:   slices.Repeat([]*input.Input{{}}, 2048),
 			expected: 1,
 		},
 		{
 			name:     "No Keep",
 			numCtx:   2048,
 			numKeep:  0,
-			inputLen: 2048,
+			inputs:   slices.Repeat([]*input.Input{{}}, 2048),
 			expected: 1024,
 		},
 		{
 			name:     "Truncate",
 			numCtx:   2048,
 			numKeep:  5,
-			inputLen: 5000,
+			inputs:   slices.Repeat([]*input.Input{{}}, 5000),
 			expected: 3973,
 		},
 		{
 			name:     "Truncate Keep",
 			numCtx:   2048,
 			numKeep:  2047,
-			inputLen: 5000,
+			inputs:   slices.Repeat([]*input.Input{{}}, 5000),
 			expected: 2953,
 		},
 		{
 			name:     "No Op",
 			numCtx:   2048,
 			numKeep:  5,
-			inputLen: 512,
+			inputs:   slices.Repeat([]*input.Input{{}}, 512),
 			expected: 0,
 		},
+		{
+			name:    "Same Batch",
+			numCtx:  2048,
+			numKeep: 5,
+			inputs: slices.Collect(func(yield func(*input.Input) bool) {
+				for range 1024 {
+					if !yield(&input.Input{}) {
+						return
+					}
+				}
+
+				if !yield(&input.Input{SameBatch: 512 - 1}) {
+					return
+				}
+
+				for range 2048 - 1024 - 1 {
+					if !yield(&input.Input{}) {
+						return
+					}
+				}
+			}),
+			expected: 1531,
+		},
+		{
+			name:    "Same Batch Near Start",
+			numCtx:  2048,
+			numKeep: 5,
+			inputs: slices.Collect(func(yield func(*input.Input) bool) {
+				for range 10 {
+					if !yield(&input.Input{}) {
+						return
+					}
+				}
+
+				if !yield(&input.Input{SameBatch: 512 - 1}) {
+					return
+				}
+
+				for range 2048 - 10 - 1 {
+					if !yield(&input.Input{}) {
+						return
+					}
+				}
+			}),
+			expected: 1021,
+		},
+		{
+			name:   "Consecutive Same Batch",
+			numCtx: 32,
+			inputs: slices.Collect(func(yield func(*input.Input) bool) {
+				for i := range 32 {
+					input := input.Input{}
+					if i%10 == 0 {
+						input.SameBatch = 10 - 1
+					}
+					if !yield(&input) {
+						return
+					}
+				}
+			}),
+			expected: 20,
+		},
+		{
+			name:   "Overlapping Same Batch",
+			numCtx: 32,
+			inputs: slices.Collect(func(yield func(*input.Input) bool) {
+				for i := range 32 {
+					input := input.Input{}
+					if slices.Contains([]int{4, 8, 14}, i) {
+						input.SameBatch = 10 - 1
+					}
+					if !yield(&input) {
+						return
+					}
+				}
+			}),
+			expected: 24,
+		},
 	}

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			c := InputCache{numCtx: tt.numCtx}
-			result := c.ShiftDiscard(tt.inputLen, tt.numKeep)
+			result := c.ShiftDiscard(tt.inputs, tt.numKeep)
 			if result != tt.expected {
-				t.Errorf("shiftDiscard(ctx: %v, keep: %v input: %v): have %v; want %v", tt.numCtx, tt.numKeep, tt.inputLen, result, tt.expected)
+				t.Errorf("shiftDiscard(ctx: %v, keep: %v inputs: %v): have %v; want %v", tt.numCtx, tt.numKeep, len(tt.inputs), result, tt.expected)
 			}
 		})
 	}
--- a/runner/ollamarunner/runner.go
+++ b/runner/ollamarunner/runner.go
@@ -214,7 +214,6 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]*input.Input,
 		parts = []string{prompt}
 	}

-	postTokenize := false
 	for i, part := range parts {
 		// text - tokenize
 		tokens, err := s.model.(model.TextProcessor).Encode(part, i == 0)
@@ -257,11 +256,10 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]*input.Input,
 			mmStore.addMultimodal(imageEmbeddings)

 			inputs = append(inputs, &input.Input{Multimodal: imageEmbeddings, MultimodalHash: imageHash})
-			postTokenize = true
 		}
 	}

-	if visionModel && postTokenize {
+	if visionModel {
 		var err error
 		inputs, err = multimodalProcessor.PostTokenize(inputs)
 		if err != nil {