mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 14:26:30 +00:00
feat(model): add qwen3vl (#12665)
This commit is contained in:
@@ -235,15 +235,28 @@ func countCommonPrefix(a []*input.Input, b []*input.Input) int32 {
|
||||
return count
|
||||
}
|
||||
|
||||
// TODO(jessegross): If we need to reprocess the inputs we should ensure that
|
||||
// we don't split up a SameBatch
|
||||
func (c *InputCache) ShiftDiscard(inputLen int32, numKeep int32) int32 {
|
||||
targetFree := (c.numCtx - numKeep) / 2
|
||||
targetFree = max(targetFree, 1)
|
||||
// ShiftDiscard computes how many inputs can be discarded from the cache. Inputs in the same batch
|
||||
// are discarded together.
|
||||
func (c *InputCache) ShiftDiscard(inputs []*input.Input, numKeep int32) int32 {
|
||||
targetFree := max((c.numCtx-numKeep)/2, 1)
|
||||
currentFree := c.numCtx - int32(len(inputs))
|
||||
|
||||
currentFree := c.numCtx - inputLen
|
||||
var discard, sameBatch int32
|
||||
for _, input := range inputs[numKeep:] {
|
||||
if sameBatch <= 0 && currentFree >= targetFree {
|
||||
break
|
||||
}
|
||||
|
||||
return max(targetFree-currentFree, 0)
|
||||
sameBatch--
|
||||
currentFree++
|
||||
discard++
|
||||
|
||||
if input.SameBatch > 0 {
|
||||
sameBatch = int32(input.SameBatch)
|
||||
}
|
||||
}
|
||||
|
||||
return discard
|
||||
}
|
||||
|
||||
type ErrReprocessInputs struct {
|
||||
@@ -264,7 +277,7 @@ func (c *InputCache) ShiftCacheSlot(slot *InputCacheSlot, numKeep int32) error {
|
||||
}
|
||||
|
||||
inputLen := int32(len(slot.Inputs))
|
||||
discard := c.ShiftDiscard(inputLen, numKeep)
|
||||
discard := c.ShiftDiscard(slot.Inputs, numKeep)
|
||||
|
||||
if discard <= 0 {
|
||||
return nil
|
||||
|
||||
@@ -3,6 +3,7 @@ package ollamarunner
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -238,59 +239,137 @@ func TestShiftDiscard(t *testing.T) {
|
||||
name string
|
||||
numCtx int32
|
||||
numKeep int32
|
||||
inputLen int32
|
||||
inputs []*input.Input
|
||||
expected int32
|
||||
}{
|
||||
{
|
||||
name: "Shift",
|
||||
numCtx: 2048,
|
||||
numKeep: 5,
|
||||
inputLen: 2048,
|
||||
inputs: slices.Repeat([]*input.Input{{}}, 2048),
|
||||
expected: 1021,
|
||||
},
|
||||
{
|
||||
name: "Max Keep",
|
||||
numCtx: 2048,
|
||||
numKeep: 2047,
|
||||
inputLen: 2048,
|
||||
inputs: slices.Repeat([]*input.Input{{}}, 2048),
|
||||
expected: 1,
|
||||
},
|
||||
{
|
||||
name: "No Keep",
|
||||
numCtx: 2048,
|
||||
numKeep: 0,
|
||||
inputLen: 2048,
|
||||
inputs: slices.Repeat([]*input.Input{{}}, 2048),
|
||||
expected: 1024,
|
||||
},
|
||||
{
|
||||
name: "Truncate",
|
||||
numCtx: 2048,
|
||||
numKeep: 5,
|
||||
inputLen: 5000,
|
||||
inputs: slices.Repeat([]*input.Input{{}}, 5000),
|
||||
expected: 3973,
|
||||
},
|
||||
{
|
||||
name: "Truncate Keep",
|
||||
numCtx: 2048,
|
||||
numKeep: 2047,
|
||||
inputLen: 5000,
|
||||
inputs: slices.Repeat([]*input.Input{{}}, 5000),
|
||||
expected: 2953,
|
||||
},
|
||||
{
|
||||
name: "No Op",
|
||||
numCtx: 2048,
|
||||
numKeep: 5,
|
||||
inputLen: 512,
|
||||
inputs: slices.Repeat([]*input.Input{{}}, 512),
|
||||
expected: 0,
|
||||
},
|
||||
{
|
||||
name: "Same Batch",
|
||||
numCtx: 2048,
|
||||
numKeep: 5,
|
||||
inputs: slices.Collect(func(yield func(*input.Input) bool) {
|
||||
for range 1024 {
|
||||
if !yield(&input.Input{}) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if !yield(&input.Input{SameBatch: 512 - 1}) {
|
||||
return
|
||||
}
|
||||
|
||||
for range 2048 - 1024 - 1 {
|
||||
if !yield(&input.Input{}) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}),
|
||||
expected: 1531,
|
||||
},
|
||||
{
|
||||
name: "Same Batch Near Start",
|
||||
numCtx: 2048,
|
||||
numKeep: 5,
|
||||
inputs: slices.Collect(func(yield func(*input.Input) bool) {
|
||||
for range 10 {
|
||||
if !yield(&input.Input{}) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if !yield(&input.Input{SameBatch: 512 - 1}) {
|
||||
return
|
||||
}
|
||||
|
||||
for range 2048 - 10 - 1 {
|
||||
if !yield(&input.Input{}) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}),
|
||||
expected: 1021,
|
||||
},
|
||||
{
|
||||
name: "Consecutive Same Batch",
|
||||
numCtx: 32,
|
||||
inputs: slices.Collect(func(yield func(*input.Input) bool) {
|
||||
for i := range 32 {
|
||||
input := input.Input{}
|
||||
if i%10 == 0 {
|
||||
input.SameBatch = 10 - 1
|
||||
}
|
||||
if !yield(&input) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}),
|
||||
expected: 20,
|
||||
},
|
||||
{
|
||||
name: "Overlapping Same Batch",
|
||||
numCtx: 32,
|
||||
inputs: slices.Collect(func(yield func(*input.Input) bool) {
|
||||
for i := range 32 {
|
||||
input := input.Input{}
|
||||
if slices.Contains([]int{4, 8, 14}, i) {
|
||||
input.SameBatch = 10 - 1
|
||||
}
|
||||
if !yield(&input) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}),
|
||||
expected: 24,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
c := InputCache{numCtx: tt.numCtx}
|
||||
result := c.ShiftDiscard(tt.inputLen, tt.numKeep)
|
||||
result := c.ShiftDiscard(tt.inputs, tt.numKeep)
|
||||
if result != tt.expected {
|
||||
t.Errorf("shiftDiscard(ctx: %v, keep: %v input: %v): have %v; want %v", tt.numCtx, tt.numKeep, tt.inputLen, result, tt.expected)
|
||||
t.Errorf("shiftDiscard(ctx: %v, keep: %v inputs: %v): have %v; want %v", tt.numCtx, tt.numKeep, len(tt.inputs), result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -214,7 +214,6 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]*input.Input,
|
||||
parts = []string{prompt}
|
||||
}
|
||||
|
||||
postTokenize := false
|
||||
for i, part := range parts {
|
||||
// text - tokenize
|
||||
tokens, err := s.model.(model.TextProcessor).Encode(part, i == 0)
|
||||
@@ -257,11 +256,10 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]*input.Input,
|
||||
mmStore.addMultimodal(imageEmbeddings)
|
||||
|
||||
inputs = append(inputs, &input.Input{Multimodal: imageEmbeddings, MultimodalHash: imageHash})
|
||||
postTokenize = true
|
||||
}
|
||||
}
|
||||
|
||||
if visionModel && postTokenize {
|
||||
if visionModel {
|
||||
var err error
|
||||
inputs, err = multimodalProcessor.PostTokenize(inputs)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user