Merge pull request #12248 from ollama/drifkin/qwen3-coder-parsing

add qwen3-coder tool support
This commit is contained in:
Devon Rifkin
2025-09-16 10:21:43 -07:00
committed by GitHub
15 changed files with 2012 additions and 57 deletions

View File

@@ -323,6 +323,8 @@ func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML,
RootFS: RootFS{
Type: "layers",
},
Renderer: r.Renderer,
Parser: r.Parser,
}
var layers []Layer

View File

@@ -24,6 +24,7 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/fs/gguf"
"github.com/ollama/ollama/model/parsers"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/thinking"
@@ -94,8 +95,9 @@ func (m *Model) Capabilities() []model.Capability {
return capabilities
}
builtinParser := parsers.ParserForName(m.Config.Parser)
// Check for tools capability
if slices.Contains(m.Template.Vars(), "tools") {
if slices.Contains(m.Template.Vars(), "tools") || (builtinParser != nil && builtinParser.HasToolSupport()) {
capabilities = append(capabilities, model.CapabilityTools)
}
@@ -112,7 +114,8 @@ func (m *Model) Capabilities() []model.Capability {
// Check for thinking capability
openingTag, closingTag := thinking.InferTags(m.Template.Template)
hasTags := openingTag != "" && closingTag != ""
if hasTags || slices.Contains([]string{"gptoss", "gpt-oss"}, m.Config.ModelFamily) {
isGptoss := slices.Contains([]string{"gptoss", "gpt-oss"}, m.Config.ModelFamily)
if hasTags || isGptoss || (builtinParser != nil && builtinParser.HasThinkingSupport()) {
capabilities = append(capabilities, model.CapabilityThinking)
}
@@ -198,6 +201,20 @@ func (m *Model) String() string {
})
}
if m.Config.Renderer != "" {
modelfile.Commands = append(modelfile.Commands, parser.Command{
Name: "renderer",
Args: m.Config.Renderer,
})
}
if m.Config.Parser != "" {
modelfile.Commands = append(modelfile.Commands, parser.Command{
Name: "parser",
Args: m.Config.Parser,
})
}
for k, v := range m.Options {
switch v := v.(type) {
case []any:
@@ -238,6 +255,8 @@ type ConfigV2 struct {
ModelFamilies []string `json:"model_families"`
ModelType string `json:"model_type"`
FileType string `json:"file_type"`
Renderer string `json:"renderer,omitempty"`
Parser string `json:"parser,omitempty"`
// required by spec
Architecture string `json:"architecture"`

View File

@@ -11,6 +11,7 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/model/renderers"
"github.com/ollama/ollama/template"
)
@@ -41,18 +42,12 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
}
}
thinkVal := false
thinkLevel := ""
if think != nil {
thinkVal = think.Bool()
thinkLevel = think.String()
}
var b bytes.Buffer
if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[i:]...), Tools: tools, Think: thinkVal, ThinkLevel: thinkLevel, IsThinkSet: think != nil}); err != nil {
p, err := renderPrompt(m, append(system, msgs[i:]...), tools, think)
if err != nil {
return "", nil, err
}
s, err := tokenize(ctx, b.String())
s, err := tokenize(ctx, p)
if err != nil {
return "", nil, err
}
@@ -101,6 +96,23 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
}
// truncate any messages that do not fit into the context window
p, err := renderPrompt(m, append(system, msgs[currMsgIdx:]...), tools, think)
if err != nil {
return "", nil, err
}
return p, images, nil
}
func renderPrompt(m *Model, msgs []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error) {
if m.Config.Renderer != "" {
rendered, err := renderers.RenderWithRenderer(m.Config.Renderer, msgs, tools, think)
if err != nil {
return "", err
}
return rendered, nil
}
var b bytes.Buffer
thinkVal := false
thinkLevel := ""
@@ -108,9 +120,8 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
thinkVal = think.Bool()
thinkLevel = think.String()
}
if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[currMsgIdx:]...), Tools: tools, Think: thinkVal, ThinkLevel: thinkLevel, IsThinkSet: think != nil}); err != nil {
return "", nil, err
if err := m.Template.Execute(&b, template.Values{Messages: msgs, Tools: tools, Think: thinkVal, ThinkLevel: thinkLevel, IsThinkSet: think != nil}); err != nil {
return "", err
}
return b.String(), images, nil
return b.String(), nil
}

View File

@@ -35,6 +35,7 @@ import (
"github.com/ollama/ollama/harmony"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/logutil"
"github.com/ollama/ollama/model/parsers"
"github.com/ollama/ollama/openai"
"github.com/ollama/ollama/server/internal/client/ollama"
"github.com/ollama/ollama/server/internal/registry"
@@ -329,10 +330,10 @@ func (s *Server) GenerateHandler(c *gin.Context) {
// If debug mode is enabled, return the rendered template instead of calling the model
if req.DebugRenderOnly {
c.JSON(http.StatusOK, api.DebugTemplateResponse{
c.JSON(http.StatusOK, api.GenerateResponse{
Model: req.Model,
CreatedAt: time.Now().UTC(),
DebugInfo: api.DebugInfo{
DebugInfo: &api.DebugInfo{
RenderedTemplate: prompt,
ImageCount: len(images),
},
@@ -1625,10 +1626,15 @@ func (s *Server) ChatHandler(c *gin.Context) {
}
msgs = filterThinkTags(msgs, m)
var builtinParser parsers.Parser
if m.Config.Parser != "" {
builtinParser = parsers.ParserForName(m.Config.Parser)
}
var harmonyMessageHandler *harmony.HarmonyMessageHandler
var harmonyToolParser *harmony.HarmonyToolCallAccumulator
useHarmony := shouldUseHarmony(m)
useHarmony := shouldUseHarmony(m) || m.Config.Parser == "harmony"
processedTools := req.Tools
if useHarmony {
@@ -1658,10 +1664,10 @@ func (s *Server) ChatHandler(c *gin.Context) {
// If debug mode is enabled, return the rendered template instead of calling the model
if req.DebugRenderOnly {
c.JSON(http.StatusOK, api.DebugTemplateResponse{
c.JSON(http.StatusOK, api.ChatResponse{
Model: req.Model,
CreatedAt: time.Now().UTC(),
DebugInfo: api.DebugInfo{
DebugInfo: &api.DebugInfo{
RenderedTemplate: prompt,
ImageCount: len(images),
},
@@ -1721,6 +1727,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
}
// TODO(drifkin): fold this as much as possibleinto the generic m.Config.Parser logic
if useHarmony {
content, thinking, toolContent := harmonyMessageHandler.AddContent(r.Content, harmonyToolParser)
res.Message.Content = content
@@ -1747,6 +1754,27 @@ func (s *Server) ChatHandler(c *gin.Context) {
ch <- res
}
return
} else if builtinParser != nil {
slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser input", "parser", m.Config.Parser, "content", r.Content)
content, thinking, toolCalls, err := builtinParser.Add(r.Content, req.Tools)
if err != nil {
ch <- gin.H{"error": err.Error()}
return
}
res.Message.Content = content
res.Message.Thinking = thinking
res.Message.ToolCalls = toolCalls
if res.Message.Content != "" || res.Message.Thinking != "" || len(res.Message.ToolCalls) > 0 || r.Done {
slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser output", "parser", m.Config.Parser, "content", content, "thinking", thinking, "toolCalls", toolCalls, "done", r.Done)
ch <- res
} else {
slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser empty output", "parser", m.Config.Parser)
}
return
}

View File

@@ -180,7 +180,7 @@ func TestGenerateDebugRenderOnly(t *testing.T) {
t.Errorf("expected status %d, got %d, body: %s", http.StatusOK, w.Code, w.Body.String())
}
var response api.DebugTemplateResponse
var response api.GenerateResponse
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("failed to unmarshal response: %v", err)
}
@@ -385,7 +385,7 @@ func TestChatDebugRenderOnly(t *testing.T) {
t.Errorf("expected status %d, got %d, body: %s", http.StatusOK, w.Code, w.Body.String())
}
var response api.DebugTemplateResponse
var response api.ChatResponse
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("failed to unmarshal response: %v", err)
}