Grace/qwen3 thinking (#12647)

* changing initial status to take into consideration prefill

* Add seperate strings for content and thinking builder

* thinking tests

* remove white space from string before closing think tag
This commit is contained in:
Grace
2025-10-16 15:29:41 -07:00
committed by GitHub
parent 1813ff85a0
commit e2a0b24435
4 changed files with 231 additions and 15 deletions

View File

@@ -22,7 +22,6 @@ const (
thinkingCloseTag = "</think>"
)
// TODO(gguo): add a field for isThinking
type Qwen3VLParser struct {
state qwenParserState
buffer strings.Builder
@@ -34,21 +33,28 @@ func (p *Qwen3VLParser) HasToolSupport() bool {
return true
}
// TODO(gguo): changes this to reference an objects param
func (p *Qwen3VLParser) HasThinkingSupport() bool {
return p.hasThinkingSupport
}
func (p *Qwen3VLParser) initialState() qwenParserState {
if p.HasThinkingSupport() { // has thinking, start from collecting thinking content
return CollectingThinkingContent
func (p *Qwen3VLParser) setInitialState(lastMessage *api.Message) {
prefill := lastMessage != nil && lastMessage.Role == "assistant"
if !p.HasThinkingSupport() {
p.state = CollectingContent
return
}
return CollectingContent
if prefill && lastMessage.Content != "" {
p.state = CollectingContent
return
}
p.state = CollectingThinkingContent
}
func (p *Qwen3VLParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
p.tools = tools
p.state = p.initialState()
p.setInitialState(lastMessage)
return tools
}
@@ -63,7 +69,8 @@ func (p *Qwen3VLParser) Add(s string, done bool) (content string, thinking strin
events := p.parseEvents()
var toolCalls []api.ToolCall
var sb strings.Builder
var contentSb strings.Builder
var thinkingSb strings.Builder
for _, event := range events {
switch event := event.(type) {
case qwenEventRawToolCall:
@@ -74,15 +81,15 @@ func (p *Qwen3VLParser) Add(s string, done bool) (content string, thinking strin
}
toolCalls = append(toolCalls, toolCall)
case qwenEventThinkingContent:
sb.WriteString(event.content)
thinkingSb.WriteString(event.content)
case qwenEventContent:
// TODO(drifkin): if the same turn contains multiple interleaved content
// events, we naively append them together here.
sb.WriteString(event.content)
contentSb.WriteString(event.content)
}
}
return sb.String(), "", toolCalls, nil
return contentSb.String(), thinkingSb.String(), toolCalls, nil
}
func (p *Qwen3VLParser) parseEvents() []qwenEvent {
@@ -155,7 +162,7 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
case CollectingToolContent:
if strings.Contains(p.buffer.String(), toolCloseTag) {
split := strings.SplitN(p.buffer.String(), toolCloseTag, 2)
before := split[0]
before := split[0] // do we also need to do it to tool calls?
if len(before) == 0 {
slog.Warn("qwen tool call closing tag found but no content before it")
}
@@ -169,10 +176,11 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
} else {
return events, false
}
case CollectingThinkingContent: // so we want to hip the unambiguous stuff
case CollectingThinkingContent:
if strings.Contains(p.buffer.String(), thinkingCloseTag) {
split := strings.SplitN(p.buffer.String(), thinkingCloseTag, 2)
before := split[0]
// before := split[0]
before := strings.TrimRightFunc(split[0], unicode.IsSpace)
if len(before) == 0 {
slog.Warn("qwen tool call closing tag found but no content before it")
}
@@ -184,7 +192,7 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
p.buffer.WriteString(after)
p.state = CollectingContent
return events, true
} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 { // we see part of a close thinking tag
} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 {
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen