Qwen3VL Cloud Parser and Renderer (#12526)

* working (other than tool call is the incorrect order) for tool calls and tools

* Tests work, other than image tags (tests do not go through server) and tools (not in the correct order, but contents are the same)

* testing for qwen3vl parser - toolparser is working

* made changes to JSON tool parser, wraps the TollCallFunction with a TollCall object

* Working parser for thinking models - assumes state of thinking, emits unambiguous content in thinking, does not call tool call in thinking

* changed the parser to start with collecting content

* thinking prefill

* add hasThinkingSupport parameter to parser

* qwen3-vl -> qwen3-vl-instruct for renderer/parser

* Add hasThinkingSupport=false to QwenVLParser

---------

Co-authored-by: Devon Rifkin <drifkin@drifkin.net>
This commit is contained in:
Grace
2025-10-13 16:52:33 -07:00
committed by GitHub
parent 4987f13d34
commit 05982a95cb
16 changed files with 2654 additions and 22 deletions

View File

@@ -55,7 +55,9 @@ func renderAdditionalKeys(obj any, handledKeys map[string]bool) string {
return sb.String()
}
func Qwen3CoderRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
type Qwen3CoderRenderer struct{}
func (r *Qwen3CoderRenderer) Render(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
var sb strings.Builder
// filter out system messages and choose the first (if any) to win

View File

@@ -288,7 +288,7 @@ call tool<|im_end|>
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rendered, err := Qwen3CoderRenderer(tt.msgs, tt.tools, nil)
rendered, err := (&Qwen3CoderRenderer{}).Render(tt.msgs, tt.tools, nil)
if err != nil {
t.Fatal(err)
}

166
model/renderers/qwen3vl.go Normal file
View File

@@ -0,0 +1,166 @@
package renderers
import (
"encoding/json"
"strings"
"github.com/ollama/ollama/api"
)
func marshalWithSpaces(v any) ([]byte, error) {
b, err := json.Marshal(v)
if err != nil {
return nil, err
}
out := make([]byte, 0, len(b)+len(b)/8)
inStr, esc := false, false
for _, c := range b {
if inStr {
out = append(out, c)
if esc {
esc = false
continue
}
if c == '\\' {
esc = true
continue
}
if c == '"' {
inStr = false
}
continue
}
switch c {
case '"':
inStr = true
out = append(out, c)
case ':':
out = append(out, ':', ' ')
case ',':
out = append(out, ',', ' ')
default:
out = append(out, c)
}
}
return out, nil
}
type Qwen3VLRenderer struct {
isThinking bool
}
func (r *Qwen3VLRenderer) renderContent(content api.Message, doVisionCount bool) string {
// This assumes all images are at the front of the message - same assumption as ollama/ollama/runner.go
var subSb strings.Builder
for range content.Images {
subSb.WriteString("<|vision_start|><|image_pad|><|vision_end|>")
}
// TODO: support videos
subSb.WriteString(content.Content)
return subSb.String()
}
func (r *Qwen3VLRenderer) Render(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
var sb strings.Builder
if len(tools) > 0 {
sb.WriteString(imStartTag + "system\n")
if len(messages) > 0 && messages[0].Role == "system" {
sb.WriteString(messages[0].Content + "\n\n")
}
sb.WriteString("# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>")
for _, tool := range tools {
sb.WriteString("\n")
if b, err := marshalWithSpaces(tool); err == nil {
sb.Write(b)
}
}
sb.WriteString("\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n")
} else if len(messages) > 0 && messages[0].Role == "system" {
sb.WriteString("<|im_start|>system\n" + messages[0].Content + "<|im_end|>\n")
}
multiStepTool := true
lastQueryIndex := len(messages) - 1 // so this is the last user message
for i := len(messages) - 1; i >= 0; i-- {
message := messages[i]
if multiStepTool && message.Role == "user" {
// Check if content starts with <tool_response> and ends with </tool_response>
content := r.renderContent(message, true)
if !(strings.HasPrefix(content, "<tool_response>") && strings.HasSuffix(content, "</tool_response>")) {
multiStepTool = false
lastQueryIndex = i
}
}
}
for i, message := range messages {
content := r.renderContent(message, true)
lastMessage := i == len(messages)-1
prefill := lastMessage && message.Role == "assistant"
if message.Role == "user" || message.Role == "system" && i != 0 {
sb.WriteString("<|im_start|>" + message.Role + "\n" + content + "<|im_end|>\n")
} else if message.Role == "assistant" {
contentReasoning := ""
if r.isThinking {
if message.Thinking != "" {
contentReasoning = message.Thinking
}
}
if r.isThinking && i > lastQueryIndex {
if i == len(messages)-1 || contentReasoning != "" {
sb.WriteString("<|im_start|>" + message.Role + "\n<think>\n" + strings.Trim(contentReasoning, "\n")) // do we want to add a new line here?
if content != "" {
sb.WriteString("\n</think>\n\n" + strings.TrimLeft(content, "\n"))
}
} else {
sb.WriteString("<|im_start|>" + message.Role + "\n" + content)
}
} else {
sb.WriteString("<|im_start|>" + message.Role + "\n" + content)
}
if len(message.ToolCalls) > 0 {
for j, toolCall := range message.ToolCalls {
if j > 0 || content != "" {
sb.WriteString("\n")
}
sb.WriteString("<tool_call>\n{\"name\": \"" + toolCall.Function.Name + "\", \"arguments\": ")
if b, err := marshalWithSpaces(toolCall.Function.Arguments); err == nil {
sb.Write(b)
}
sb.WriteString("}\n</tool_call>")
}
}
if !prefill {
sb.WriteString("<|im_end|>\n")
}
} else if message.Role == "tool" {
if i == 0 || messages[i-1].Role != "tool" {
sb.WriteString("<|im_start|>user")
}
sb.WriteString("\n<tool_response>\n" + message.Content + "\n</tool_response>")
if i == len(messages)-1 || messages[i+1].Role != "tool" {
sb.WriteString("<|im_end|>\n")
}
}
// prefill at the end
if lastMessage && !prefill {
sb.WriteString("<|im_start|>assistant\n")
if r.isThinking {
sb.WriteString("<think>\n")
}
}
}
return sb.String(), nil
}

View File

@@ -0,0 +1,497 @@
package renderers
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api"
)
func TestQwen3VLNonThinkingRenderer(t *testing.T) {
tests := []struct {
name string
msgs []api.Message
images []api.ImageData
tools []api.Tool
expected string
}{
{
name: "prefill",
msgs: []api.Message{
{Role: "system", Content: "You are a helpful assistant."},
{Role: "user", Content: "Tell me something interesting."},
{Role: "assistant", Content: "I'll tell you something interesting about cats"},
},
expected: `<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Tell me something interesting.<|im_end|>
<|im_start|>assistant
I'll tell you something interesting about cats`,
},
{
name: "basic",
msgs: []api.Message{
{Role: "system", Content: "You are a helpful assistant."},
{Role: "user", Content: "Hello, how are you?"},
},
expected: `<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Hello, how are you?<|im_end|>
<|im_start|>assistant
`,
},
{
name: "With thinking, end assistant.",
msgs: []api.Message{
{Role: "user", Content: "Tell me a story in two sentences."},
{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think>"}, // does the thinking even work?
},
expected: `<|im_start|>user
Tell me a story in two sentences.<|im_end|>
<|im_start|>assistant
abc<think>To make this story interesting, I will speak in poetry.</think>`,
},
{
name: "Multiple thinking",
msgs: []api.Message{
{Role: "user", Content: "Tell me a story in two sentences."},
{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think>"},
},
expected: `<|im_start|>user
Tell me a story in two sentences.<|im_end|>
<|im_start|>assistant
abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think>`, // NOTE: the second thinking tag is not captured
},
{
name: "Multiple thinking, multiple messages.",
msgs: []api.Message{
{Role: "user", Content: "Tell me a story in two sentences."},
{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think>"},
{Role: "user", Content: "What is the weather like in San Francisco? <think>I will check the weather in San Francisco for you.</think>"},
{Role: "assistant", Content: "I'll check the weather in San Francisco for you.<think>Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence.</think>"},
},
expected: `<|im_start|>user
Tell me a story in two sentences.<|im_end|>
<|im_start|>assistant
abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think><|im_end|>
<|im_start|>user
What is the weather like in San Francisco? <think>I will check the weather in San Francisco for you.</think><|im_end|>
<|im_start|>assistant
I'll check the weather in San Francisco for you.<think>Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence.</think>`,
},
{
name: "Image",
msgs: []api.Message{
{Role: "user", Content: "Describe this image.", Images: []api.ImageData{api.ImageData("img2")}},
{Role: "assistant", Content: "Let me analyze this image."},
},
expected: `<|im_start|>user
<|vision_start|><|image_pad|><|vision_end|>Describe this image.<|im_end|>
<|im_start|>assistant
Let me analyze this image.`,
},
{
name: "Multiple images",
msgs: []api.Message{
{Role: "user", Content: "Describe these images.", Images: []api.ImageData{api.ImageData("img1"), api.ImageData("img2")}},
},
expected: `<|im_start|>user
<|vision_start|><|image_pad|><|vision_end|><|vision_start|><|image_pad|><|vision_end|>Describe these images.<|im_end|>
<|im_start|>assistant
`,
},
// // NOTE: solved with #12518: https://github.com/ollama/ollama/compare/main...drifkin/stable-tool-args
// {
// name: "with tools and response",
// msgs: []api.Message{
// {Role: "system", Content: "You are a helpful assistant with access to tools."},
// {Role: "user", Content: "What's the weather like in New York?"},
// {
// Role: "assistant",
// Content: "I'll check the weather in New York for you.",
// ToolCalls: []api.ToolCall{
// {
// Function: api.ToolCallFunction{
// Name: "get-current-weather",
// Arguments: map[string]any{
// "location": "New York",
// "unit": "fahrenheit",
// },
// },
// },
// },
// },
// {Role: "tool", Content: "80", ToolName: "get-current-weather"},
// {Role: "user", Content: "That sounds nice! What about San Francisco?"},
// },
// tools: []api.Tool{
// {
// Type: "function",
// Function: api.ToolFunction{
// Name: "get-current-weather",
// Description: "Get the current weather for a location",
// Parameters: api.ToolFunctionParameters{
// Type: "object",
// Required: []string{"location"},
// Properties: map[string]api.ToolProperty{
// "location": {
// Type: api.PropertyType{"string"},
// Description: "The city and state, e.g. San Francisco, CA",
// },
// "unit": {
// Type: api.PropertyType{"string"},
// Enum: []any{"celsius", "fahrenheit"},
// Description: "The temperature unit",
// },
// },
// },
// },
// },
// },
// expected: `<|im_start|>system
// You are a helpful assistant with access to tools.
// # Tools
// You may call one or more functions to assist with the user query.
// You are provided with function signatures within <tools></tools> XML tags:
// <tools>
// {"type": "function", "function": {"name": "get-current-weather", "description": "Get the current weather for a location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit"}}, "required": ["location"]}}}
// </tools>
// For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
// <tool_call>
// {"name": <function-name>, "arguments": <args-json-object>}
// </tool_call><|im_end|>
// <|im_start|>user
// What's the weather like in New York?<|im_end|>
// <|im_start|>assistant
// I'll check the weather in New York for you.
// <tool_call>
// {"name": "get-current-weather", "arguments": {"location": "New York", "unit": "fahrenheit"}}
// </tool_call><|im_end|>
// <|im_start|>user
// <tool_response>
// 80
// </tool_response><|im_end|>
// <|im_start|>user
// That sounds nice! What about San Francisco?<|im_end|>
// <|im_start|>assistant
// `,
// },
// // NOTE: solved with #12518: https://github.com/ollama/ollama/compare/main...drifkin/stable-tool-args
// {
// name: "With tools and response, multiple tool calls",
// msgs: []api.Message{
// {
// Role: "system",
// Content: "You are a helpful assistant with access to tools.",
// },
// {
// Role: "user",
// Content: "Call two tools for me: add and multiply.",
// },
// {
// Role: "assistant",
// Content: "Sure, I'll call both tools for you.",
// ToolCalls: []api.ToolCall{
// {
// Function: api.ToolCallFunction{
// Name: "add",
// Arguments: map[string]any{
// "a": 2,
// "b": 3,
// },
// },
// },
// {
// Function: api.ToolCallFunction{
// Name: "multiply",
// Arguments: map[string]any{
// "x": 4,
// "y": 5,
// },
// },
// },
// },
// },
// {
// Role: "tool",
// Content: "5",
// ToolName: "add",
// },
// {
// Role: "tool",
// Content: "20",
// ToolName: "multiply",
// },
// {
// Role: "user",
// Content: "Thanks! What are the results?",
// },
// },
// tools: []api.Tool{
// {
// Type: "function",
// Function: api.ToolFunction{
// Name: "add",
// Description: "Add two numbers",
// Parameters: api.ToolFunctionParameters{
// Type: "object",
// Required: []string{"a", "b"},
// Properties: map[string]api.ToolProperty{
// "a": {Type: api.PropertyType{"integer"}, Description: "First number"},
// "b": {Type: api.PropertyType{"integer"}, Description: "Second number"},
// },
// },
// },
// },
// {
// Type: "function",
// Function: api.ToolFunction{
// Name: "multiply",
// Description: "Multiply two numbers",
// Parameters: api.ToolFunctionParameters{
// Type: "object",
// Required: []string{"x", "y"},
// Properties: map[string]api.ToolProperty{
// "x": {Type: api.PropertyType{"integer"}, Description: "First factor"},
// "y": {Type: api.PropertyType{"integer"}, Description: "Second factor"},
// },
// },
// },
// },
// },
// expected: `<|im_start|>system
// You are a helpful assistant with access to tools.
// # Tools
// You may call one or more functions to assist with the user query.
// You are provided with function signatures within <tools></tools> XML tags:
// <tools>
// {"type": "function", "function": {"name": "add", "description": "Add two numbers", "parameters": {"type": "object", "properties": {"a": {"type": "integer"}, "b": {"type": "integer"}}, "required": ["a", "b"]}}}
// {"type": "function", "function": {"name": "multiply", "description": "Multiply two numbers", "parameters": {"type": "object", "properties": {"x": {"description": "First factor"}, "y": {"description": "Second factor"}}, "required": ["x", "y"]}}}
// </tools>
// For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
// <tool_call>
// {"name": <function-name>, "arguments": <args-json-object>}
// </tool_call><|im_end|>
// <|im_start|>user
// Call two tools for me: add and multiply.<|im_end|>
// <|im_start|>assistant
// Sure, I'll call both tools for you.
// <tool_call>
// {"name": "add", "arguments": {"a": 2, "b": 3}}
// </tool_call>
// <tool_call>
// {"name": "multiply", "arguments": {"x": 4, "y": 5}}
// </tool_call><|im_end|>
// <|im_start|>user
// <tool_response>
// 5
// </tool_response>
// <tool_response>
// 20
// </tool_response><|im_end|>
// <|im_start|>user
// Thanks! What are the results?<|im_end|>
// <|im_start|>assistant
// `,
// },
{
name: "user tool_response block preserved",
msgs: []api.Message{
{Role: "user", Content: "What's the weather?"},
{
Role: "assistant",
Content: "I'll check.",
ToolCalls: []api.ToolCall{
{Function: api.ToolCallFunction{Name: "get-current-weather", Arguments: map[string]any{"location": "Paris", "unit": "celsius"}}},
},
},
{Role: "user", Content: "<tool_response>\n18\n</tool_response>"},
{Role: "user", Content: "Thanks!"},
},
expected: `<|im_start|>user
What's the weather?<|im_end|>
<|im_start|>assistant
I'll check.
<tool_call>
{"name": "get-current-weather", "arguments": {"location": "Paris", "unit": "celsius"}}
</tool_call><|im_end|>
<|im_start|>user
<tool_response>
18
</tool_response><|im_end|>
<|im_start|>user
Thanks!<|im_end|>
<|im_start|>assistant
`,
},
{
name: "assistant with multiple tool calls and content",
msgs: []api.Message{
{Role: "user", Content: "Hi"},
{
Role: "assistant",
Content: "before",
ToolCalls: []api.ToolCall{
{Function: api.ToolCallFunction{Name: "add", Arguments: map[string]any{"a": 2, "b": 3}}},
{Function: api.ToolCallFunction{Name: "mul", Arguments: map[string]any{"x": 4, "y": 5}}},
},
},
},
expected: `<|im_start|>user
Hi<|im_end|>
<|im_start|>assistant
before
<tool_call>
{"name": "add", "arguments": {"a": 2, "b": 3}}
</tool_call>
<tool_call>
{"name": "mul", "arguments": {"x": 4, "y": 5}}
</tool_call>`,
},
{
name: "consecutive tool responses grouped",
msgs: []api.Message{
{Role: "user", Content: "Compute results"},
{Role: "assistant", Content: "ok", ToolCalls: []api.ToolCall{{Function: api.ToolCallFunction{Name: "job", Arguments: map[string]any{"n": 1}}}}},
{Role: "tool", Content: "5", ToolName: "job"},
{Role: "tool", Content: "6", ToolName: "job"},
},
expected: `<|im_start|>user
Compute results<|im_end|>
<|im_start|>assistant
ok
<tool_call>
{"name": "job", "arguments": {"n": 1}}
</tool_call><|im_end|>
<|im_start|>user
<tool_response>
5
</tool_response>
<tool_response>
6
</tool_response><|im_end|>
<|im_start|>assistant
`,
},
{
name: "last message is tool then prefill",
msgs: []api.Message{
{Role: "user", Content: "run"},
{Role: "assistant", Content: "ok", ToolCalls: []api.ToolCall{{Function: api.ToolCallFunction{Name: "exec", Arguments: map[string]any{"cmd": "ls"}}}}},
{Role: "tool", Content: "done", ToolName: "exec"},
},
expected: `<|im_start|>user
run<|im_end|>
<|im_start|>assistant
ok
<tool_call>
{"name": "exec", "arguments": {"cmd": "ls"}}
</tool_call><|im_end|>
<|im_start|>user
<tool_response>
done
</tool_response><|im_end|>
<|im_start|>assistant
`,
},
{
name: "user with multiple images",
msgs: []api.Message{
{Role: "user", Content: "Describe.", Images: []api.ImageData{api.ImageData("img1"), api.ImageData("img2")}},
},
expected: `<|im_start|>user
<|vision_start|><|image_pad|><|vision_end|><|vision_start|><|image_pad|><|vision_end|>Describe.<|im_end|>
<|im_start|>assistant
`,
},
{
name: "user tool_response, no whitespace",
msgs: []api.Message{
{Role: "user", Content: "What's the weather?"},
{
Role: "assistant",
Content: "I'll check.",
ToolCalls: []api.ToolCall{
{Function: api.ToolCallFunction{Name: "get-current-weather", Arguments: map[string]any{"location": "Paris", "unit": "celsius"}}},
},
},
{Role: "user", Content: "<tool_response>\n18\n</tool_response>"},
{Role: "user", Content: "Thanks!"},
},
expected: `<|im_start|>user
What's the weather?<|im_end|>
<|im_start|>assistant
I'll check.
<tool_call>
{"name": "get-current-weather", "arguments": {"location": "Paris", "unit": "celsius"}}
</tool_call><|im_end|>
<|im_start|>user
<tool_response>
18
</tool_response><|im_end|>
<|im_start|>user
Thanks!<|im_end|>
<|im_start|>assistant
`,
},
{
name: "user tool_response with surrounding whitespace",
msgs: []api.Message{
{Role: "user", Content: "What's the weather?"},
{
Role: "assistant",
Content: "I'll check.",
ToolCalls: []api.ToolCall{
{Function: api.ToolCallFunction{Name: "get-current-weather", Arguments: map[string]any{"location": "Paris", "unit": "celsius"}}},
},
},
{Role: "user", Content: "\n\n\n\n<tool_response>\n18\n</tool_response> extra\n\n\n\n\n\n"},
},
expected: `<|im_start|>user
What's the weather?<|im_end|>
<|im_start|>assistant
I'll check.
<tool_call>
{"name": "get-current-weather", "arguments": {"location": "Paris", "unit": "celsius"}}
</tool_call><|im_end|>
<|im_start|>user
<tool_response>
18
</tool_response> extra
<|im_end|>
<|im_start|>assistant
`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rendered, err := (&Qwen3VLRenderer{false}).Render(tt.msgs, tt.tools, nil)
if err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(rendered, tt.expected); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
}
}

View File

@@ -0,0 +1,346 @@
package renderers
import (
"testing"
"github.com/google/go-cmp/cmp"
)
// TODO(drifkin): this will be moved to utils in the near future and used by other renderers as well
func TestMarshalWithSpaces(t *testing.T) {
tests := []struct {
name string
input any
expected string
}{
// basic formatting tests
{
name: "simple object",
input: map[string]any{"key": "value"},
expected: `{"key": "value"}`,
},
{
name: "simple array",
input: []any{"a", "b", "c"},
expected: `["a", "b", "c"]`,
},
// escaped quotes
{
name: "escaped quote in string",
input: map[string]any{"text": `quote"inside`},
expected: `{"text": "quote\"inside"}`,
},
{
name: "multiple escaped quotes",
input: map[string]any{"text": `say "hello" and "goodbye"`},
expected: `{"text": "say \"hello\" and \"goodbye\""}`,
},
// escaped backslashes
{
name: "escaped backslash",
input: map[string]any{"path": `C:\windows\system32`},
expected: `{"path": "C:\\windows\\system32"}`,
},
{
name: "double backslash",
input: map[string]any{"text": `test\\more`},
expected: `{"text": "test\\\\more"}`,
},
{
name: "backslash before quote",
input: map[string]any{"text": `end with \"`},
expected: `{"text": "end with \\\""}`,
},
// standard JSON escape sequences
{
name: "newline in string",
input: map[string]any{"text": "line1\nline2"},
expected: `{"text": "line1\nline2"}`,
},
{
name: "tab in string",
input: map[string]any{"text": "before\tafter"},
expected: `{"text": "before\tafter"}`,
},
{
name: "carriage return",
input: map[string]any{"text": "before\rafter"},
expected: `{"text": "before\rafter"}`,
},
{
name: "multiple escape sequences",
input: map[string]any{"text": "line1\nline2\ttab\rcarriage"},
expected: `{"text": "line1\nline2\ttab\rcarriage"}`,
},
// strings containing colons and commas (no spaces should be added inside)
{
name: "colon in string",
input: map[string]any{"url": "http://example.com"},
expected: `{"url": "http://example.com"}`,
},
{
name: "comma in string",
input: map[string]any{"list": "apple, banana, cherry"},
expected: `{"list": "apple, banana, cherry"}`,
},
{
name: "colon and comma in string",
input: map[string]any{"data": "key:value, key2:value2"},
expected: `{"data": "key:value, key2:value2"}`,
},
// unicode characters
{
name: "emoji",
input: map[string]any{"emoji": "😀🎉✨"},
expected: `{"emoji": "😀🎉✨"}`,
},
{
name: "chinese characters",
input: map[string]any{"text": "你好世界"},
expected: `{"text": "你好世界"}`,
},
{
name: "arabic characters",
input: map[string]any{"text": "مرحبا"},
expected: `{"text": "مرحبا"}`,
},
{
name: "mixed unicode and ascii",
input: map[string]any{"text": "Hello 世界! 😀"},
expected: `{"text": "Hello 世界! 😀"}`,
},
{
name: "unicode with special symbols",
input: map[string]any{"text": "®©™€£¥"},
expected: `{"text": "®©™€£¥"}`,
},
// complex combinations - strings that look like JSON
{
name: "json string inside value",
input: map[string]any{"nested": `{"key":"value"}`},
expected: `{"nested": "{\"key\":\"value\"}"}`,
},
{
name: "json array inside value",
input: map[string]any{"array": `["a","b","c"]`},
expected: `{"array": "[\"a\",\"b\",\"c\"]"}`,
},
// edge cases
{
name: "empty string",
input: map[string]any{"empty": ""},
expected: `{"empty": ""}`,
},
{
name: "empty object",
input: map[string]any{},
expected: `{}`,
},
{
name: "empty array",
input: []any{},
expected: `[]`,
},
{
name: "numbers",
input: map[string]any{"int": 42, "float": 3.14},
expected: `{"float": 3.14, "int": 42}`,
},
{
name: "boolean",
input: map[string]any{"bool": true, "other": false},
expected: `{"bool": true, "other": false}`,
},
{
name: "null value",
input: map[string]any{"value": nil},
expected: `{"value": null}`,
},
// nested structures with complex strings
{
name: "nested object with escapes",
input: map[string]any{
"outer": map[string]any{
"path": `C:\folder\file.txt`,
"quote": `He said "hi"`,
},
},
expected: `{"outer": {"path": "C:\\folder\\file.txt", "quote": "He said \"hi\""}}`,
},
{
name: "array with unicode and escapes",
input: []any{
"normal",
"with\nnewline",
"with\"quote",
"emoji😀",
"colon:comma,",
},
expected: `["normal", "with\nnewline", "with\"quote", "emoji😀", "colon:comma,"]`,
},
{
name: "backslash at positions before special chars",
input: map[string]any{"text": `a\b:c\d,e`},
expected: `{"text": "a\\b:c\\d,e"}`,
},
{
name: "multiple backslashes before quote",
input: map[string]any{"text": `ends\\"`},
expected: `{"text": "ends\\\\\""}`,
},
{
name: "unicode with escapes",
input: map[string]any{"text": "Hello\n世界\t😀"},
expected: `{"text": "Hello\n世界\t😀"}`,
},
// Real-world tool call example
{
name: "tool call arguments",
input: map[string]any{
"location": "San Francisco, CA",
"unit": "fahrenheit",
"format": "json",
},
expected: `{"format": "json", "location": "San Francisco, CA", "unit": "fahrenheit"}`,
},
{
name: "complex tool arguments with escapes",
input: map[string]any{
"query": `SELECT * FROM "users" WHERE name = 'O'Brien'`,
"description": "Fetch user\ndata from DB",
"path": `C:\data\users.db`,
},
expected: `{"description": "Fetch user\ndata from DB", "path": "C:\\data\\users.db", "query": "SELECT * FROM \"users\" WHERE name = 'O'Brien'"}`,
},
{
name: "unicode immediately adjacent to JSON structure chars",
input: map[string]any{"😀key": "😀value", "test": "😀:😀,😀"},
expected: `{"test": "😀:😀,😀", "😀key": "😀value"}`,
},
{
name: "long unicode string stress test",
input: map[string]any{"text": "😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏😐😑😒😓😔😕😖😗😘😙😚😛😜😝😞😟"},
expected: `{"text": "😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏😐😑😒😓😔😕😖😗😘😙😚😛😜😝😞😟"}`,
},
{
name: "deeply nested with unicode everywhere",
input: map[string]any{
"😀": map[string]any{
"你好": []any{"مرحبا", "®©™", "∑∫∂√"},
},
},
expected: `{"😀": {"你好": ["مرحبا", "®©™", "∑∫∂√"]}}`,
},
{
name: "unicode with all JSON special chars interleaved",
input: map[string]any{"k😀:k": "v😀,v", "a:😀": "b,😀", "😀": ":,😀,:"},
expected: `{"a:😀": "b,😀", "k😀:k": "v😀,v", "😀": ":,😀,:"}`,
},
{
name: "combining diacritics and RTL text",
input: map[string]any{"hebrew": "עִבְרִית", "combined": "é̀ñ", "mixed": "test:עִבְרִית,é̀ñ"},
expected: `{"combined": "é̀ñ", "hebrew": "עִבְרִית", "mixed": "test:עִבְרִית,é̀ñ"}`,
},
{
name: "pathological case: unicode + escapes + special chars",
input: map[string]any{"😀": "test\n😀\"quote😀\\backslash😀:colon😀,comma😀"},
expected: `{"😀": "test\n😀\"quote😀\\backslash😀:colon😀,comma😀"}`,
},
// all JSON structural characters inside strings
{
name: "braces and brackets in strings",
input: map[string]any{"text": "test{with}braces[and]brackets"},
expected: `{"text": "test{with}braces[and]brackets"}`,
},
{
name: "braces and brackets with colons and commas",
input: map[string]any{"code": "{key:value,[1,2,3]}"},
expected: `{"code": "{key:value,[1,2,3]}"}`,
},
{
name: "json-like string with all structural chars",
input: map[string]any{"schema": `{"type":"object","properties":{"name":{"type":"string"},"items":{"type":"array"}}}`},
expected: `{"schema": "{\"type\":\"object\",\"properties\":{\"name\":{\"type\":\"string\"},\"items\":{\"type\":\"array\"}}}"}`,
},
// forward slash tests (JSON allows \/ as an escape sequence)
{
name: "forward slash in URL",
input: map[string]any{"url": "https://example.com/path/to/resource"},
expected: `{"url": "https://example.com/path/to/resource"}`,
},
{
name: "regex pattern with slashes",
input: map[string]any{"regex": "/[a-z]+/gi"},
expected: `{"regex": "/[a-z]+/gi"}`,
},
// all JSON escape sequences
{
name: "backspace escape",
input: map[string]any{"text": "before\bafter"},
expected: `{"text": "before\bafter"}`,
},
{
name: "form feed escape",
input: map[string]any{"text": "before\fafter"},
expected: `{"text": "before\fafter"}`,
},
{
name: "all standard escapes combined",
input: map[string]any{"text": "\"\\\b\f\n\r\t"},
expected: `{"text": "\"\\\b\f\n\r\t"}`,
},
// unicode escape sequences
{
name: "string that forces unicode escapes",
input: map[string]any{"control": "\u0000\u0001\u001f"},
expected: `{"control": "\u0000\u0001\u001f"}`,
},
// empty objects and arrays nested with strings
{
name: "nested empty structures with string values",
input: map[string]any{"empty_obj": map[string]any{}, "empty_arr": []any{}, "text": "{}[]"},
expected: `{"empty_arr": [], "empty_obj": {}, "text": "{}[]"}`,
},
// complex nesting with all structural characters
{
name: "deeply nested with all char types",
input: map[string]any{
"level1": map[string]any{
"array": []any{
map[string]any{"nested": "value:with,special{chars}[here]"},
[]any{"a", "b", "c"},
},
},
},
expected: `{"level1": {"array": [{"nested": "value:with,special{chars}[here]"}, ["a", "b", "c"]]}}`,
},
// string containing escaped structural characters
{
name: "string with multiple escape sequences and structural chars",
input: map[string]any{"data": "test\"quote\"{brace}[bracket]:colon,comma\\backslash/slash"},
expected: `{"data": "test\"quote\"{brace}[bracket]:colon,comma\\backslash/slash"}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := marshalWithSpaces(tt.input)
if err != nil {
t.Fatalf("marshalWithSpaces failed: %v", err)
}
resultStr := string(result)
if diff := cmp.Diff(resultStr, tt.expected); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
}
}

View File

@@ -0,0 +1,372 @@
package renderers
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api"
)
func TestQwen3VLThinkingRenderer(t *testing.T) {
tests := []struct {
name string
msgs []api.Message
images []api.ImageData
tools []api.Tool
expected string
}{
{
name: "basic",
msgs: []api.Message{
{Role: "system", Content: "You are a helpful assistant."},
{Role: "user", Content: "Hello, how are you?"},
},
expected: `<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Hello, how are you?<|im_end|>
<|im_start|>assistant
<think>
`,
},
{
name: "With thinking, end assistant.",
msgs: []api.Message{
{Role: "user", Content: "Tell me a story in two sentences."},
{Role: "assistant", Content: "abc", Thinking: "To make this story interesting, I will speak in poetry."},
},
expected: `<|im_start|>user
Tell me a story in two sentences.<|im_end|>
<|im_start|>assistant
<think>
To make this story interesting, I will speak in poetry.
</think>
abc`,
},
{
name: "With thinking, end assistant.",
msgs: []api.Message{
{Role: "user", Content: "Tell me a story in two sentences."},
{Role: "assistant", Thinking: "To make this story interesting, I will speak in poetry."},
},
expected: `<|im_start|>user
Tell me a story in two sentences.<|im_end|>
<|im_start|>assistant
<think>
To make this story interesting, I will speak in poetry.`,
},
{
name: "Multiple thinking",
msgs: []api.Message{
{Role: "user", Content: "Tell me a story in two sentences."},
{Role: "assistant", Content: "abc", Thinking: "To make this story interesting, I will speak in poetry.<think>And I will speak in poetry after the first sentence.</think>"},
},
expected: `<|im_start|>user
Tell me a story in two sentences.<|im_end|>
<|im_start|>assistant
<think>
To make this story interesting, I will speak in poetry.<think>And I will speak in poetry after the first sentence.</think>
</think>
abc`, // NOTE: the second thinking tag is not captured
},
{
name: "Multiple thinking, multiple messages.",
msgs: []api.Message{
{Role: "user", Content: "Tell me a story in two sentences."},
{Role: "assistant", Thinking: "To make this story interesting, I will speak in poetry.", Content: "abc"},
{Role: "user", Content: "What is the weather like in San Francisco?"},
{Role: "assistant", Thinking: "Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence."},
},
expected: `<|im_start|>user
Tell me a story in two sentences.<|im_end|>
<|im_start|>assistant
abc<|im_end|>
<|im_start|>user
What is the weather like in San Francisco?<|im_end|>
<|im_start|>assistant
<think>
Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence.`,
},
// NOTE: Servers automatically prepend a [img-<n>] tag
// {
// name: "Image",
// msgs: []api.Message{
// {Role: "user", Content: "Describe this image.", Images: []api.ImageData{api.ImageData(IMAGE2_BASE64)}},
// },
// expected: `<|im_start|>user
// [img-0]Describe this image.<|im_end|>
// <|im_start|>assistant
// <think>
// `,
// },
// NOTE: Servers automatically prepend a [img-<n>] tag
// {
// name: "Multiple images",
// msgs: []api.Message{
// {Role: "user", Content: "Describe these images.", Images: []api.ImageData{api.ImageData(IMAGE1_BASE64), api.ImageData(IMAGE2_BASE64)}},
// },
// expected: `<|im_start|>user
// [img-0][img-1]Describe these images.<|im_end|>
// <|im_start|>assistant
// <think>
// `,
// },
// NOTE: solved with #12518: https://github.com/ollama/ollama/compare/main...drifkin/stable-tool-args
// {
// name: "with tools and response",
// msgs: []api.Message{
// {Role: "system", Content: "You are a helpful assistant with access to tools."},
// {Role: "user", Content: "What's the weather like in New York?"},
// {
// Role: "assistant",
// Content: "I'll check the weather in New York for you.",
// ToolCalls: []api.ToolCall{
// {
// Function: api.ToolCallFunction{
// Name: "get-current-weather",
// Arguments: map[string]any{
// "location": "New York",
// "unit": "fahrenheit",
// },
// },
// },
// },
// },
// {Role: "tool", Content: "80", ToolName: "get-current-weather"},
// {Role: "user", Content: "That sounds nice! What about San Francisco?"},
// },
// tools: []api.Tool{
// {
// Type: "function",
// Function: api.ToolFunction{
// Name: "get-current-weather",
// Description: "Get the current weather for a location",
// Parameters: api.ToolFunctionParameters{
// Type: "object",
// Required: []string{"location"},
// Properties: map[string]api.ToolProperty{
// "location": {
// Type: api.PropertyType{"string"},
// Description: "The city and state, e.g. San Francisco, CA",
// },
// "unit": {
// Type: api.PropertyType{"string"},
// Enum: []any{"celsius", "fahrenheit"},
// Description: "The temperature unit",
// },
// },
// },
// },
// },
// },
// expected: `<|im_start|>system
// You are a helpful assistant with access to tools.
// # Tools
// You may call one or more functions to assist with the user query.
// You are provided with function signatures within <tools></tools> XML tags:
// <tools>
// {"type": "function", "function": {"name": "get-current-weather", "description": "Get the current weather for a location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit"}}, "required": ["location"]}}}
// </tools>
// For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
// <tool_call>
// {"name": <function-name>, "arguments": <args-json-object>}
// </tool_call><|im_end|>
// <|im_start|>user
// What's the weather like in New York?<|im_end|>
// <|im_start|>assistant
// I'll check the weather in New York for you.
// <tool_call>
// {"name": "get-current-weather", "arguments": {"location": "New York", "unit": "fahrenheit"}}
// </tool_call><|im_end|>
// <|im_start|>user
// <tool_response>
// 80
// </tool_response><|im_end|>
// <|im_start|>user
// That sounds nice! What about San Francisco?<|im_end|>
// <|im_start|>assistant
// <think>
// `,
// },
// NOTE: solved with #12518: https://github.com/ollama/ollama/compare/main...drifkin/stable-tool-args
// {
// name: "With tools and response, multiple tool calls",
// msgs: []api.Message{
// {
// Role: "system",
// Content: "You are a helpful assistant with access to tools.",
// },
// {
// Role: "user",
// Content: "Call two tools for me: add and multiply.",
// },
// {
// Role: "assistant",
// Content: "Sure, I'll call both tools for you.",
// ToolCalls: []api.ToolCall{
// {
// Function: api.ToolCallFunction{
// Name: "add",
// Arguments: map[string]any{
// "a": 2,
// "b": 3,
// },
// },
// },
// {
// Function: api.ToolCallFunction{
// Name: "multiply",
// Arguments: map[string]any{
// "x": 4,
// "y": 5,
// },
// },
// },
// },
// },
// {
// Role: "tool",
// Content: "5",
// ToolName: "add",
// },
// {
// Role: "tool",
// Content: "20",
// ToolName: "multiply",
// },
// {
// Role: "user",
// Content: "Thanks! What are the results?",
// },
// },
// tools: []api.Tool{
// {
// Type: "function",
// Function: api.ToolFunction{
// Name: "add",
// Description: "Add two numbers",
// Parameters: api.ToolFunctionParameters{
// Type: "object",
// Required: []string{"a", "b"},
// Properties: map[string]api.ToolProperty{
// "a": {Type: api.PropertyType{"integer"}, Description: "First number"},
// "b": {Type: api.PropertyType{"integer"}, Description: "Second number"},
// },
// },
// },
// },
// {
// Type: "function",
// Function: api.ToolFunction{
// Name: "multiply",
// Description: "Multiply two numbers",
// Parameters: api.ToolFunctionParameters{
// Type: "object",
// Required: []string{"x", "y"},
// Properties: map[string]api.ToolProperty{
// "x": {Type: api.PropertyType{"integer"}, Description: "First factor"},
// "y": {Type: api.PropertyType{"integer"}, Description: "Second factor"},
// },
// },
// },
// },
// },
// expected: `<|im_start|>system
// You are a helpful assistant with access to tools.
// # Tools
// You may call one or more functions to assist with the user query.
// You are provided with function signatures within <tools></tools> XML tags:
// <tools>
// {"type": "function", "function": {"name": "add", "description": "Add two numbers", "parameters": {"type": "object", "properties": {"a": {"type": "integer"}, "b": {"type": "integer"}}, "required": ["a", "b"]}}}
// {"type": "function", "function": {"name": "multiply", "description": "Multiply two numbers", "parameters": {"type": "object", "properties": {"x": {"type": "integer"}, "y": {"type": "integer"}}, "required": ["x", "y"]}}}
// </tools>
// For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
// <tool_call>
// {"name": <function-name>, "arguments": <args-json-object>}
// </tool_call><|im_end|>
// <|im_start|>user
// Call two tools for me: add and multiply.<|im_end|>
// <|im_start|>assistant
// Sure, I'll call both tools for you.
// <tool_call>
// {"name": "add", "arguments": {"a": 2, "b": 3}}
// </tool_call>
// <tool_call>
// {"name": "multiply", "arguments": {"x": 4, "y": 5}}
// </tool_call><|im_end|>
// <|im_start|>user
// <tool_response>
// 5
// </tool_response>
// <tool_response>
// 20
// </tool_response><|im_end|>
// <|im_start|>user
// Thanks! What are the results?<|im_end|>
// <|im_start|>assistant
// <think>
// `,
// },
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rendered, err := (&Qwen3VLRenderer{true}).Render(tt.msgs, tt.tools, nil)
if err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(rendered, tt.expected); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
}
}
func TestFormatToolCallArgumentThinkingVL(t *testing.T) {
tests := []struct {
name string
arg any
expected string
}{
{
name: "string",
arg: "foo",
expected: "foo",
},
{
name: "map",
arg: map[string]any{"foo": "bar"},
expected: "{\"foo\":\"bar\"}",
},
{
name: "number",
arg: 1,
expected: "1",
},
{
name: "boolean",
arg: true,
expected: "true",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := formatToolCallArgument(tt.arg)
if got != tt.expected {
t.Errorf("formatToolCallArgument(%v) = %v, want %v", tt.arg, got, tt.expected)
}
})
}
}

View File

@@ -1,25 +1,19 @@
package renderers
import (
"fmt"
import "github.com/ollama/ollama/api"
"github.com/ollama/ollama/api"
)
type rendererFunc func([]api.Message, []api.Tool, *api.ThinkValue) (string, error)
func RenderWithRenderer(name string, msgs []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error) {
renderer := rendererForName(name)
if renderer == nil {
return "", fmt.Errorf("unknown renderer %q", name)
}
return renderer(msgs, tools, think)
type Renderer interface {
Render(messages []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error)
}
func rendererForName(name string) rendererFunc {
func RendererForName(name string) Renderer {
switch name {
case "qwen3-coder":
return Qwen3CoderRenderer
renderer := &Qwen3CoderRenderer{}
return renderer
case "qwen3-vl-instruct":
renderer := &Qwen3VLRenderer{false}
return renderer
default:
return nil
}