mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
renderers: add global flag for setting [img] tags (#12669)
Adds a temporary global flag to renderers that causes renderers to always render images as [img]. In a follow up change, we will consider making this the default, and this flag could eventually be removed
This commit is contained in:
@@ -48,13 +48,22 @@ func marshalWithSpaces(v any) ([]byte, error) {
|
|||||||
|
|
||||||
type Qwen3VLRenderer struct {
|
type Qwen3VLRenderer struct {
|
||||||
isThinking bool
|
isThinking bool
|
||||||
|
|
||||||
|
useImgTags bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *Qwen3VLRenderer) renderContent(content api.Message, doVisionCount bool) string {
|
func (r *Qwen3VLRenderer) renderContent(content api.Message) string {
|
||||||
// This assumes all images are at the front of the message - same assumption as ollama/ollama/runner.go
|
// This assumes all images are at the front of the message - same assumption as ollama/ollama/runner.go
|
||||||
var subSb strings.Builder
|
var subSb strings.Builder
|
||||||
for range content.Images {
|
for range content.Images {
|
||||||
subSb.WriteString("<|vision_start|><|image_pad|><|vision_end|>")
|
// TODO: (jmorganca): how to render this is different for different
|
||||||
|
// model backends, and so we should eventually parameterize this or
|
||||||
|
// only output a placeholder such as [img]
|
||||||
|
if r.useImgTags {
|
||||||
|
subSb.WriteString("[img]")
|
||||||
|
} else {
|
||||||
|
subSb.WriteString("<|vision_start|><|image_pad|><|vision_end|>")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// TODO: support videos
|
// TODO: support videos
|
||||||
|
|
||||||
@@ -88,7 +97,7 @@ func (r *Qwen3VLRenderer) Render(messages []api.Message, tools []api.Tool, _ *ap
|
|||||||
message := messages[i]
|
message := messages[i]
|
||||||
if multiStepTool && message.Role == "user" {
|
if multiStepTool && message.Role == "user" {
|
||||||
// Check if content starts with <tool_response> and ends with </tool_response>
|
// Check if content starts with <tool_response> and ends with </tool_response>
|
||||||
content := r.renderContent(message, true)
|
content := r.renderContent(message)
|
||||||
if !(strings.HasPrefix(content, "<tool_response>") && strings.HasSuffix(content, "</tool_response>")) {
|
if !(strings.HasPrefix(content, "<tool_response>") && strings.HasSuffix(content, "</tool_response>")) {
|
||||||
multiStepTool = false
|
multiStepTool = false
|
||||||
lastQueryIndex = i
|
lastQueryIndex = i
|
||||||
@@ -97,7 +106,7 @@ func (r *Qwen3VLRenderer) Render(messages []api.Message, tools []api.Tool, _ *ap
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i, message := range messages {
|
for i, message := range messages {
|
||||||
content := r.renderContent(message, true)
|
content := r.renderContent(message)
|
||||||
|
|
||||||
lastMessage := i == len(messages)-1
|
lastMessage := i == len(messages)-1
|
||||||
prefill := lastMessage && message.Role == "assistant"
|
prefill := lastMessage && message.Role == "assistant"
|
||||||
|
|||||||
@@ -9,11 +9,12 @@ import (
|
|||||||
|
|
||||||
func TestQwen3VLNonThinkingRenderer(t *testing.T) {
|
func TestQwen3VLNonThinkingRenderer(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
msgs []api.Message
|
msgs []api.Message
|
||||||
images []api.ImageData
|
images []api.ImageData
|
||||||
tools []api.Tool
|
tools []api.Tool
|
||||||
expected string
|
useImgTags bool
|
||||||
|
expected string
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "prefill",
|
name: "prefill",
|
||||||
@@ -90,6 +91,18 @@ I'll check the weather in San Francisco for you.<think>Speak poetry after the fi
|
|||||||
expected: `<|im_start|>user
|
expected: `<|im_start|>user
|
||||||
<|vision_start|><|image_pad|><|vision_end|>Describe this image.<|im_end|>
|
<|vision_start|><|image_pad|><|vision_end|>Describe this image.<|im_end|>
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
|
Let me analyze this image.`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Image with image tags",
|
||||||
|
msgs: []api.Message{
|
||||||
|
{Role: "user", Content: "Describe this image.", Images: []api.ImageData{api.ImageData("img2")}},
|
||||||
|
{Role: "assistant", Content: "Let me analyze this image."},
|
||||||
|
},
|
||||||
|
useImgTags: true,
|
||||||
|
expected: `<|im_start|>user
|
||||||
|
[img]Describe this image.<|im_end|>
|
||||||
|
<|im_start|>assistant
|
||||||
Let me analyze this image.`,
|
Let me analyze this image.`,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -102,7 +115,18 @@ Let me analyze this image.`,
|
|||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
`,
|
`,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "Multiple images with image tags",
|
||||||
|
msgs: []api.Message{
|
||||||
|
{Role: "user", Content: "Describe these images.", Images: []api.ImageData{api.ImageData("img1"), api.ImageData("img2")}},
|
||||||
|
{Role: "assistant", Content: "Let me analyze this image."},
|
||||||
|
},
|
||||||
|
useImgTags: true,
|
||||||
|
expected: `<|im_start|>user
|
||||||
|
[img][img]Describe these images.<|im_end|>
|
||||||
|
<|im_start|>assistant
|
||||||
|
Let me analyze this image.`,
|
||||||
|
},
|
||||||
// // NOTE: solved with #12518: https://github.com/ollama/ollama/compare/main...drifkin/stable-tool-args
|
// // NOTE: solved with #12518: https://github.com/ollama/ollama/compare/main...drifkin/stable-tool-args
|
||||||
// {
|
// {
|
||||||
// name: "with tools and response",
|
// name: "with tools and response",
|
||||||
@@ -485,7 +509,7 @@ I'll check.
|
|||||||
}
|
}
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
rendered, err := (&Qwen3VLRenderer{false}).Render(tt.msgs, tt.tools, nil)
|
rendered, err := (&Qwen3VLRenderer{isThinking: false, useImgTags: tt.useImgTags}).Render(tt.msgs, tt.tools, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -323,7 +323,7 @@ Speak poetry after the first sentence.</think><think>Speak poetry after the seco
|
|||||||
}
|
}
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
rendered, err := (&Qwen3VLRenderer{true}).Render(tt.msgs, tt.tools, nil)
|
rendered, err := (&Qwen3VLRenderer{isThinking: true}).Render(tt.msgs, tt.tools, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,11 @@ type (
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// RenderImgTags is a global flag that tells renderers to use [img] tags
|
||||||
|
// for images. This is set by the Ollama server package on init, or left as
|
||||||
|
// false for other environments where renderers are used
|
||||||
|
var RenderImgTags bool
|
||||||
|
|
||||||
func (r *RendererRegistry) Register(name string, renderer RendererConstructor) {
|
func (r *RendererRegistry) Register(name string, renderer RendererConstructor) {
|
||||||
r.renderers[name] = renderer
|
r.renderers[name] = renderer
|
||||||
}
|
}
|
||||||
@@ -46,10 +51,10 @@ func rendererForName(name string) Renderer {
|
|||||||
renderer := &Qwen3CoderRenderer{}
|
renderer := &Qwen3CoderRenderer{}
|
||||||
return renderer
|
return renderer
|
||||||
case "qwen3-vl-instruct":
|
case "qwen3-vl-instruct":
|
||||||
renderer := &Qwen3VLRenderer{false}
|
renderer := &Qwen3VLRenderer{isThinking: false, useImgTags: RenderImgTags}
|
||||||
return renderer
|
return renderer
|
||||||
case "qwen3-vl-thinking":
|
case "qwen3-vl-thinking":
|
||||||
renderer := &Qwen3VLRenderer{true}
|
renderer := &Qwen3VLRenderer{isThinking: true, useImgTags: RenderImgTags}
|
||||||
return renderer
|
return renderer
|
||||||
default:
|
default:
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ import (
|
|||||||
"github.com/ollama/ollama/logutil"
|
"github.com/ollama/ollama/logutil"
|
||||||
"github.com/ollama/ollama/middleware"
|
"github.com/ollama/ollama/middleware"
|
||||||
"github.com/ollama/ollama/model/parsers"
|
"github.com/ollama/ollama/model/parsers"
|
||||||
|
"github.com/ollama/ollama/model/renderers"
|
||||||
"github.com/ollama/ollama/server/internal/client/ollama"
|
"github.com/ollama/ollama/server/internal/client/ollama"
|
||||||
"github.com/ollama/ollama/server/internal/registry"
|
"github.com/ollama/ollama/server/internal/registry"
|
||||||
"github.com/ollama/ollama/template"
|
"github.com/ollama/ollama/template"
|
||||||
@@ -91,6 +92,9 @@ func init() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
gin.SetMode(mode)
|
gin.SetMode(mode)
|
||||||
|
|
||||||
|
// Tell renderers to use [img] tags
|
||||||
|
renderers.RenderImgTags = true
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|||||||
Reference in New Issue
Block a user