DeepseekV3 Family Parser (#13484)

2025-12-21 14:26:30 +00:00 · 2025-12-16 18:56:30 -08:00
parent f6a016f49d
commit a013693f80
6 changed files with 1099 additions and 6 deletions
--- a/model/parsers/deepseek3.go
+++ b/model/parsers/deepseek3.go
@@ -0,0 +1,292 @@
+package parsers
+
+import (
+	"encoding/json"
+	"errors"
+	"log/slog"
+	"strings"
+	"unicode"
+
+	"github.com/ollama/ollama/api"
+)
+
+type DeepSeek3ParserState int
+
+const (
+	DeepSeekCollectingThinking DeepSeek3ParserState = iota
+	DeepSeekCollectingContent
+	DeepSeekCollectingToolCalls
+	DeepSeekCollectingToolOutput
+)
+
+const (
+	deepseekThinkingCloseTag   = "</think>"
+	deepseekToolCallsBeginTag  = "<｜tool▁calls▁begin｜>"
+	deepseekToolCallsEndTag    = "<｜tool▁calls▁end｜>"
+	deepseekToolCallBeginTag   = "<｜tool▁call▁begin｜>"
+	deepseekToolCallEndTag     = "<｜tool▁call▁end｜>"
+	deepseekToolSepTag         = "<｜tool▁sep｜>"
+	deepseekToolOutputBeginTag = "<｜tool▁output▁begin｜>"
+	deepseekToolOutputEndTag   = "<｜tool▁output▁end｜>"
+)
+
+type DeepSeek3Parser struct {
+	state              DeepSeek3ParserState
+	buffer             strings.Builder
+	hasThinkingSupport bool
+}
+
+func (p *DeepSeek3Parser) HasToolSupport() bool {
+	return true
+}
+
+func (p *DeepSeek3Parser) HasThinkingSupport() bool {
+	return p.hasThinkingSupport
+}
+
+func (p *DeepSeek3Parser) setInitialState(lastMessage *api.Message, tools []api.Tool, thinkValue *api.ThinkValue) {
+	prefill := lastMessage != nil && lastMessage.Role == "assistant"
+
+	// Check both model capability AND request preference
+	thinkingEnabled := p.HasThinkingSupport() && (thinkValue != nil && thinkValue.Bool())
+
+	if !thinkingEnabled {
+		p.state = DeepSeekCollectingContent
+		return
+	}
+
+	if prefill && lastMessage.Content != "" {
+		p.state = DeepSeekCollectingContent
+		return
+	}
+
+	p.state = DeepSeekCollectingThinking
+}
+
+func (p *DeepSeek3Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
+	p.setInitialState(lastMessage, tools, thinkValue)
+	return tools
+}
+
+type deepseekEvent interface {
+	isDeepSeekEvent()
+}
+
+type deepseekEventThinkingContent struct {
+	content string
+}
+
+type deepseekEventContent struct {
+	content string
+}
+
+type deepseekEventToolCall struct {
+	toolCall api.ToolCall
+}
+
+func (deepseekEventThinkingContent) isDeepSeekEvent() {}
+func (deepseekEventContent) isDeepSeekEvent()         {}
+func (deepseekEventToolCall) isDeepSeekEvent()        {}
+
+func (p *DeepSeek3Parser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
+	p.buffer.WriteString(s)
+	events := p.parseEvents()
+
+	var toolCalls []api.ToolCall
+	var contentSb strings.Builder
+	var thinkingSb strings.Builder
+	for _, event := range events {
+		switch event := event.(type) {
+		case deepseekEventToolCall:
+			toolCalls = append(toolCalls, event.toolCall)
+		case deepseekEventThinkingContent:
+			thinkingSb.WriteString(event.content)
+		case deepseekEventContent:
+			contentSb.WriteString(event.content)
+		}
+	}
+
+	return contentSb.String(), thinkingSb.String(), toolCalls, nil
+}
+
+func (p *DeepSeek3Parser) parseEvents() []deepseekEvent {
+	var all []deepseekEvent
+
+	keepLooping := true
+	for keepLooping {
+		var events []deepseekEvent
+		events, keepLooping = p.eat()
+		if len(events) > 0 {
+			all = append(all, events...)
+		}
+	}
+
+	return all
+}
+
+func (p *DeepSeek3Parser) eat() ([]deepseekEvent, bool) {
+	var events []deepseekEvent
+	bufStr := p.buffer.String()
+	if bufStr == "" {
+		return events, false
+	}
+
+	switch p.state {
+	case DeepSeekCollectingThinking:
+		if strings.Contains(bufStr, deepseekThinkingCloseTag) { // thinking[</think>] -> content
+			split := strings.SplitN(bufStr, deepseekThinkingCloseTag, 2)
+			thinking := split[0]
+			thinking = strings.TrimRightFunc(thinking, unicode.IsSpace)
+
+			remaining := split[1]
+			remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
+
+			p.buffer.Reset()
+			p.buffer.WriteString(remaining)
+			p.state = DeepSeekCollectingContent
+
+			if len(thinking) > 0 {
+				events = append(events, deepseekEventThinkingContent{content: thinking})
+			}
+			return events, true
+		} else if overlapLen := overlap(bufStr, deepseekThinkingCloseTag); overlapLen > 0 { // partial </think>
+			beforePartialTag := bufStr[:len(bufStr)-overlapLen]
+			trailingLen := trailingWhitespaceLen(beforePartialTag)
+			ambiguousStart := len(beforePartialTag) - trailingLen
+
+			unambiguous := bufStr[:ambiguousStart]
+			ambiguous := bufStr[ambiguousStart:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, deepseekEventThinkingContent{content: unambiguous})
+			}
+			return events, false
+		} else { // otherwise its thinking content
+			whitespaceLen := trailingWhitespaceLen(bufStr)
+			ambiguousStart := len(bufStr) - whitespaceLen
+
+			unambiguous := bufStr[:ambiguousStart]
+			ambiguous := bufStr[ambiguousStart:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, deepseekEventThinkingContent{content: unambiguous})
+			}
+			return events, false
+		}
+
+	case DeepSeekCollectingContent:
+		switch {
+		case strings.Contains(bufStr, deepseekToolCallsBeginTag): // content[<｜tool▁calls▁begin｜>] -> tool calls
+			split := strings.SplitN(bufStr, deepseekToolCallsBeginTag, 2)
+			contentBefore := strings.TrimRightFunc(split[0], unicode.IsSpace)
+			remaining := split[1]
+
+			p.buffer.Reset()
+			p.buffer.WriteString(remaining)
+			p.state = DeepSeekCollectingToolCalls
+
+			if len(contentBefore) > 0 {
+				events = append(events, deepseekEventContent{content: contentBefore})
+			}
+			return events, true
+		case strings.Contains(bufStr, deepseekToolOutputBeginTag): // content[<｜tool▁output▁begin｜>] -> tool output
+			split := strings.SplitN(bufStr, deepseekToolOutputBeginTag, 2)
+			contentBefore := split[0] // Don't trim whitespace - preserve spaces
+			remaining := split[1]
+
+			p.buffer.Reset()
+			p.buffer.WriteString(remaining)
+			p.state = DeepSeekCollectingToolOutput
+
+			if len(contentBefore) > 0 {
+				events = append(events, deepseekEventContent{content: contentBefore})
+			}
+			return events, true
+		default: // otherwise its content
+			p.buffer.Reset()
+			if len(bufStr) > 0 {
+				events = append(events, deepseekEventContent{content: bufStr})
+			}
+			return events, false
+		}
+
+	case DeepSeekCollectingToolCalls:
+		if idx := strings.Index(bufStr, deepseekToolCallBeginTag); idx != -1 {
+			startIdx := idx + len(deepseekToolCallBeginTag)
+			if endIdx := strings.Index(bufStr[startIdx:], deepseekToolCallEndTag); endIdx != -1 {
+				toolCallContent := bufStr[startIdx : startIdx+endIdx]
+
+				if toolCall, err := p.parseToolCallContent(toolCallContent); err == nil {
+					remaining := bufStr[startIdx+endIdx+len(deepseekToolCallEndTag):]
+					remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
+
+					p.buffer.Reset()
+					p.buffer.WriteString(remaining)
+
+					events = append(events, deepseekEventToolCall{toolCall: toolCall})
+					return events, true
+				} else {
+					slog.Warn("deepseek tool call parsing failed", "error", err)
+				}
+			}
+		}
+
+		if idx := strings.Index(bufStr, deepseekToolCallsEndTag); idx != -1 {
+			remaining := bufStr[idx+len(deepseekToolCallsEndTag):]
+			remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
+
+			p.buffer.Reset()
+			p.buffer.WriteString(remaining)
+			p.state = DeepSeekCollectingContent
+
+			return events, true
+		}
+
+		return events, false
+
+	case DeepSeekCollectingToolOutput:
+		if idx := strings.Index(bufStr, deepseekToolOutputEndTag); idx != -1 {
+			toolOutputContent := bufStr[:idx]
+			remaining := bufStr[idx+len(deepseekToolOutputEndTag):]
+			// Don't trim whitespace - preserve spaces after tool output tags
+
+			p.buffer.Reset()
+			p.buffer.WriteString(remaining)
+			p.state = DeepSeekCollectingContent
+
+			if len(toolOutputContent) > 0 {
+				events = append(events, deepseekEventContent{content: toolOutputContent})
+			}
+			return events, true
+		}
+
+		return events, false
+	}
+
+	return events, false
+}
+
+func (p *DeepSeek3Parser) parseToolCallContent(content string) (api.ToolCall, error) {
+	// Expected format: tool_name<｜tool▁sep｜>{args}
+	parts := strings.SplitN(content, deepseekToolSepTag, 2)
+	if len(parts) < 2 {
+		return api.ToolCall{}, errors.New("invalid format")
+	}
+
+	toolName := strings.TrimSpace(parts[0])
+	argsJSON := strings.TrimSpace(parts[1])
+
+	var args api.ToolCallFunctionArguments
+	if err := json.Unmarshal([]byte(argsJSON), &args); err != nil {
+		return api.ToolCall{}, err
+	}
+
+	return api.ToolCall{
+		Function: api.ToolCallFunction{
+			Name:      toolName,
+			Arguments: args,
+		},
+	}, nil
+}
--- a/model/parsers/deepseek3_test.go
+++ b/model/parsers/deepseek3_test.go
@@ -0,0 +1,721 @@
+package parsers
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+
+	"github.com/ollama/ollama/api"
+)
+
+func TestDeepSeekParser(t *testing.T) {
+	tests := []struct {
+		name             string
+		input            string
+		expectedContent  string
+		expectedThinking string
+		expectedCalls    []api.ToolCall
+		hasThinking      bool
+	}{
+		{
+			name:            "simple_content",
+			input:           "Hello, how are you?",
+			expectedContent: "Hello, how are you?",
+			hasThinking:     false,
+		},
+		{
+			name:             "thinking_content",
+			input:            "I need to think about this...</think>The answer is 42.",
+			expectedThinking: "I need to think about this...",
+			expectedContent:  "The answer is 42.",
+			hasThinking:      true,
+		},
+		{
+			name:            "no_thinking_simple",
+			input:           "Just a regular response.",
+			expectedContent: "Just a regular response.",
+			hasThinking:     false,
+		},
+		{
+			name:             "thinking_with_newlines",
+			input:            "Let me think:\n- Point 1\n- Point 2</think>\n\nHere's my answer.",
+			expectedThinking: "Let me think:\n- Point 1\n- Point 2",
+			expectedContent:  "Here's my answer.",
+			hasThinking:      true,
+		},
+		{
+			name:            "tool_call_simple",
+			input:           "I'll check the weather.<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"location\":\"Paris\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
+			expectedContent: "I'll check the weather.",
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "get_weather",
+						Arguments: api.ToolCallFunctionArguments{
+							"location": "Paris",
+						},
+					},
+				},
+			},
+			hasThinking: false,
+		},
+		{
+			name:            "multiple_tool_calls",
+			input:           "Getting weather for both cities.<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"location\":\"Paris\"}<｜tool▁call▁end｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"location\":\"London\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
+			expectedContent: "Getting weather for both cities.",
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "get_weather",
+						Arguments: api.ToolCallFunctionArguments{
+							"location": "Paris",
+						},
+					},
+				},
+				{
+					Function: api.ToolCallFunction{
+						Name: "get_weather",
+						Arguments: api.ToolCallFunctionArguments{
+							"location": "London",
+						},
+					},
+				},
+			},
+			hasThinking: false,
+		},
+		{
+			name:            "tool_output",
+			input:           "Here's the weather: <｜tool▁output▁begin｜>Temperature: 22°C, Sunny<｜tool▁output▁end｜> Hope that helps!",
+			expectedContent: "Here's the weather: Temperature: 22°C, Sunny Hope that helps!",
+			hasThinking:     false,
+		},
+		{
+			name:            "complex_tool_arguments",
+			input:           "Processing data.<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>process_data<｜tool▁sep｜>{\"items\":[\"item1\",\"item2\"],\"config\":{\"enabled\":true,\"threshold\":0.95}}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
+			expectedContent: "Processing data.",
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "process_data",
+						Arguments: api.ToolCallFunctionArguments{
+							"items":  []interface{}{"item1", "item2"},
+							"config": map[string]interface{}{"enabled": true, "threshold": 0.95},
+						},
+					},
+				},
+			},
+			hasThinking: false,
+		},
+		{
+			name:             "thinking_with_tool_call", // technically this can't happen, but the parser can handle it
+			input:            "Let me check the weather...</think>I'll get that for you.<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"location\":\"Paris\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
+			expectedThinking: "Let me check the weather...",
+			expectedContent:  "I'll get that for you.",
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "get_weather",
+						Arguments: api.ToolCallFunctionArguments{
+							"location": "Paris",
+						},
+					},
+				},
+			},
+			hasThinking: true,
+		},
+		{
+			name:            "empty_content",
+			input:           "",
+			expectedContent: "",
+			hasThinking:     false,
+		},
+		{
+			name:             "only_thinking",
+			input:            "Just thinking content</think>",
+			expectedThinking: "Just thinking content",
+			expectedContent:  "",
+			hasThinking:      true,
+		},
+		{
+			name:            "multiple_tool_outputs",
+			input:           "Results: <｜tool▁output▁begin｜>Paris: 22°C<｜tool▁output▁end｜> and <｜tool▁output▁begin｜>London: 18°C<｜tool▁output▁end｜>",
+			expectedContent: "Results: Paris: 22°C and London: 18°C",
+			hasThinking:     false,
+		},
+		{
+			name:            "unicode_content",
+			input:           "مرحبا بالعالم! 你好世界! 🌍",
+			expectedContent: "مرحبا بالعالم! 你好世界! 🌍",
+			hasThinking:     false,
+		},
+		{
+			name:            "emoji_passthrough",
+			input:           "Task completed ✅ 🎉",
+			expectedContent: "Task completed ✅ 🎉",
+			hasThinking:     false,
+		},
+		{
+			name:            "emoji_after_tool_call",
+			input:           "I'll help you.<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"location\":\"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>完成 ✅",
+			expectedContent: "I'll help you.完成 ✅",
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "get_weather",
+						Arguments: api.ToolCallFunctionArguments{
+							"location": "Tokyo",
+						},
+					},
+				},
+			},
+			hasThinking: false,
+		},
+		{
+			name:            "newlines_and_whitespace",
+			input:           "Line 1\n\nLine 3\t\tTabbed content",
+			expectedContent: "Line 1\n\nLine 3\t\tTabbed content",
+			hasThinking:     false,
+		},
+		{
+			name:             "thinking_with_unicode",
+			input:            "我在思考这个问题...</think>答案是42。",
+			expectedThinking: "我在思考这个问题...",
+			expectedContent:  "答案是42。",
+			hasThinking:      true,
+		},
+		{
+			name:            "tool_call_with_unicode_args",
+			input:           "Searching for information.<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>search<｜tool▁sep｜>{\"query\":\"北京天气\",\"language\":\"中文\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
+			expectedContent: "Searching for information.",
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "search",
+						Arguments: api.ToolCallFunctionArguments{
+							"query":    "北京天气",
+							"language": "中文",
+						},
+					},
+				},
+			},
+			hasThinking: false,
+		},
+		{
+			name:            "tool_output_with_unicode",
+			input:           "天气信息: <｜tool▁output▁begin｜>北京: 25°C, 晴天<｜tool▁output▁end｜> 希望对您有帮助!",
+			expectedContent: "天气信息: 北京: 25°C, 晴天 希望对您有帮助!",
+			hasThinking:     false,
+		},
+		{
+			name:            "mixed_content_with_special_chars",
+			input:           "Price: $100 & tax @ 10% = $110 <｜tool▁output▁begin｜>Total: $110<｜tool▁output▁end｜> (final)",
+			expectedContent: "Price: $100 & tax @ 10% = $110 Total: $110 (final)",
+			hasThinking:     false,
+		},
+		{
+			name:            "tool_call_with_special_chars",
+			input:           "Processing data.<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>execute_command<｜tool▁sep｜>{\"command\":\"ls && echo \\\"done\\\"\",\"path\":\"/home/user\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
+			expectedContent: "Processing data.",
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "execute_command",
+						Arguments: api.ToolCallFunctionArguments{
+							"command": "ls && echo \"done\"",
+							"path":    "/home/user",
+						},
+					},
+				},
+			},
+			hasThinking: false,
+		},
+		{
+			name:             "thinking_with_special_chars",
+			input:            "Let me calculate: 2+2=4 & 3*3=9...</think>The results are correct!",
+			expectedThinking: "Let me calculate: 2+2=4 & 3*3=9...",
+			expectedContent:  "The results are correct!",
+			hasThinking:      true,
+		},
+		{
+			name:            "empty_tool_call_args",
+			input:           "Pinging server.<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>ping<｜tool▁sep｜>{}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
+			expectedContent: "Pinging server.",
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name:      "ping",
+						Arguments: api.ToolCallFunctionArguments{},
+					},
+				},
+			},
+			hasThinking: false,
+		},
+		{
+			name:            "empty_tool_output",
+			input:           "Checking status: <｜tool▁output▁begin｜><｜tool▁output▁end｜> No output received.",
+			expectedContent: "Checking status:  No output received.",
+			hasThinking:     false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			parser := &DeepSeek3Parser{hasThinkingSupport: tt.hasThinking}
+			parser.Init([]api.Tool{}, nil, &api.ThinkValue{Value: tt.hasThinking})
+
+			content, thinking, calls, err := parser.Add(tt.input, true)
+			if err != nil {
+				t.Fatalf("Add() error = %v", err)
+			}
+
+			if diff := cmp.Diff(tt.expectedContent, content); diff != "" {
+				t.Errorf("Content mismatch (-want +got):\n%s", diff)
+			}
+
+			if diff := cmp.Diff(tt.expectedThinking, thinking); diff != "" {
+				t.Errorf("Thinking mismatch (-want +got):\n%s", diff)
+			}
+
+			if diff := cmp.Diff(tt.expectedCalls, calls); diff != "" {
+				t.Errorf("Tool calls mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
+
+func TestDeepSeekParser_Streaming(t *testing.T) {
+	tests := []struct {
+		name             string
+		chunks           []string
+		expectedContent  string
+		expectedThinking string
+		expectedCalls    []api.ToolCall
+		hasThinking      bool
+	}{
+		{
+			name:            "streaming_simple_content",
+			chunks:          []string{"Hello, ", "how are ", "you?"},
+			expectedContent: "Hello, how are you?",
+			hasThinking:     false,
+		},
+		{
+			name:             "streaming_thinking",
+			chunks:           []string{"I need to ", "think about this", "...</think>", "The answer is 42."},
+			expectedThinking: "I need to think about this...",
+			expectedContent:  "The answer is 42.",
+			hasThinking:      true,
+		},
+		{
+			name:            "streaming_tool_call",
+			chunks:          []string{"I'll check weather.", "<｜tool▁calls▁begin｜>", "<｜tool▁call▁begin｜>get_weather", "<｜tool▁sep｜>{\"location\":\"Paris\"}", "<｜tool▁call▁end｜><｜tool▁calls▁end｜>"},
+			expectedContent: "I'll check weather.",
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "get_weather",
+						Arguments: api.ToolCallFunctionArguments{
+							"location": "Paris",
+						},
+					},
+				},
+			},
+			hasThinking: false,
+		},
+		{
+			name:             "streaming_thinking_with_partial_tag",
+			chunks:           []string{"Thinking about this", "...</", "think>", "Done thinking."},
+			expectedThinking: "Thinking about this...",
+			expectedContent:  "Done thinking.",
+			hasThinking:      true,
+		},
+		{
+			name:            "streaming_tool_output",
+			chunks:          []string{"Weather info: ", "<｜tool▁output▁begin｜>", "25°C, Sunny", "<｜tool▁output▁end｜>", " Enjoy!"},
+			expectedContent: "Weather info: 25°C, Sunny Enjoy!",
+			hasThinking:     false,
+		},
+		{
+			name:            "streaming_with_split_tags",
+			chunks:          []string{"Content before ", "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>test", "<｜tool▁sep｜>{}", "<｜tool▁call▁end｜><｜tool▁calls▁end｜>", " after"},
+			expectedContent: "Content before  after",
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name:      "test",
+						Arguments: api.ToolCallFunctionArguments{},
+					},
+				},
+			},
+			hasThinking: false,
+		},
+		{
+			name:             "streaming_thinking_with_split_end_tag",
+			chunks:           []string{"Thinking content", "</th", "ink>", "Regular content"},
+			expectedThinking: "Thinking content",
+			expectedContent:  "Regular content",
+			hasThinking:      true,
+		},
+		{
+			name:            "streaming_unicode_content",
+			chunks:          []string{"مرحبا ", "بالعالم! ", "你好", "世界!"},
+			expectedContent: "مرحبا بالعالم! 你好世界!",
+			hasThinking:     false,
+		},
+		{
+			name:            "streaming_multiple_tool_outputs",
+			chunks:          []string{"Results: ", "<｜tool▁output▁begin｜>", "Paris: 22°C", "<｜tool▁output▁end｜>", " and ", "<｜tool▁output▁begin｜>", "London: 18°C", "<｜tool▁output▁end｜>"},
+			expectedContent: "Results: Paris: 22°C and London: 18°C",
+			hasThinking:     false,
+		},
+		{
+			name:            "streaming_tool_call_with_split_json",
+			chunks:          []string{"Processing.", "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>calc<｜tool▁sep｜>{\"x\":", "42,\"y\":", "24}<｜tool▁call▁end｜><｜tool▁calls▁end｜>"},
+			expectedContent: "Processing.",
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "calc",
+						Arguments: api.ToolCallFunctionArguments{
+							"x": float64(42),
+							"y": float64(24),
+						},
+					},
+				},
+			},
+			hasThinking: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			parser := &DeepSeek3Parser{hasThinkingSupport: tt.hasThinking}
+			parser.Init([]api.Tool{}, nil, &api.ThinkValue{Value: tt.hasThinking})
+
+			var allContent, allThinking string
+			var allCalls []api.ToolCall
+
+			for i, chunk := range tt.chunks {
+				done := i == len(tt.chunks)-1
+				content, thinking, calls, err := parser.Add(chunk, done)
+				if err != nil {
+					t.Fatalf("Add() error = %v", err)
+				}
+
+				allContent += content
+				allThinking += thinking
+				allCalls = append(allCalls, calls...)
+			}
+
+			if diff := cmp.Diff(tt.expectedContent, allContent); diff != "" {
+				t.Errorf("Content mismatch (-want +got):\n%s", diff)
+			}
+
+			if diff := cmp.Diff(tt.expectedThinking, allThinking); diff != "" {
+				t.Errorf("Thinking mismatch (-want +got):\n%s", diff)
+			}
+
+			if diff := cmp.Diff(tt.expectedCalls, allCalls); diff != "" {
+				t.Errorf("Tool calls mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
+
+func TestDeepSeekParser_HasThinkingSupport(t *testing.T) {
+	tests := []struct {
+		name            string
+		hasThinking     bool
+		expectedSupport bool
+	}{
+		{
+			name:            "thinking_enabled",
+			hasThinking:     true,
+			expectedSupport: true,
+		},
+		{
+			name:            "thinking_disabled",
+			hasThinking:     false,
+			expectedSupport: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			parser := &DeepSeek3Parser{hasThinkingSupport: tt.hasThinking}
+			if got := parser.HasThinkingSupport(); got != tt.expectedSupport {
+				t.Errorf("HasThinkingSupport() = %v, want %v", got, tt.expectedSupport)
+			}
+		})
+	}
+}
+
+func TestDeepSeekParser_HasToolSupport(t *testing.T) {
+	parser := &DeepSeek3Parser{}
+	if !parser.HasToolSupport() {
+		t.Error("HasToolSupport() should return true")
+	}
+}
+
+func TestDeepSeekParser_Init(t *testing.T) {
+	parser := &DeepSeek3Parser{hasThinkingSupport: true}
+	tools := []api.Tool{
+		{
+			Type: "function",
+			Function: api.ToolFunction{
+				Name: "test_tool",
+			},
+		},
+	}
+
+	returnedTools := parser.Init(tools, nil, &api.ThinkValue{Value: true})
+
+	if diff := cmp.Diff(tools, returnedTools); diff != "" {
+		t.Errorf("Init() returned tools mismatch (-want +got):\n%s", diff)
+	}
+
+	// Test initial state is set to thinking when enabled
+	if parser.state != DeepSeekCollectingThinking {
+		t.Errorf("Expected initial state to be DeepSeekCollectingThinking, got %v", parser.state)
+	}
+}
+
+func TestDeepSeek3Parser_parseToolCallContent(t *testing.T) {
+	tests := []struct {
+		name        string
+		content     string
+		expected    api.ToolCall
+		expectError bool
+	}{
+		{
+			name:    "valid_tool_call",
+			content: "get_weather<｜tool▁sep｜>{\"location\":\"Paris\"}",
+			expected: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "get_weather",
+					Arguments: api.ToolCallFunctionArguments{
+						"location": "Paris",
+					},
+				},
+			},
+		},
+		{
+			name:    "complex_arguments",
+			content: "process_data<｜tool▁sep｜>{\"items\":[\"a\",\"b\"],\"config\":{\"enabled\":true}}",
+			expected: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "process_data",
+					Arguments: api.ToolCallFunctionArguments{
+						"items":  []interface{}{"a", "b"},
+						"config": map[string]interface{}{"enabled": true},
+					},
+				},
+			},
+		},
+		{
+			name:    "empty_arguments",
+			content: "ping<｜tool▁sep｜>{}",
+			expected: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name:      "ping",
+					Arguments: api.ToolCallFunctionArguments{},
+				},
+			},
+		},
+		{
+			name:    "unicode_in_tool_name",
+			content: "获取天气<｜tool▁sep｜>{\"城市\":\"北京\"}",
+			expected: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "获取天气",
+					Arguments: api.ToolCallFunctionArguments{
+						"城市": "北京",
+					},
+				},
+			},
+		},
+		{
+			name:    "special_chars_in_arguments",
+			content: "execute<｜tool▁sep｜>{\"command\":\"ls && echo \\\"done\\\"\",\"path\":\"/home/user\"}",
+			expected: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "execute",
+					Arguments: api.ToolCallFunctionArguments{
+						"command": "ls && echo \"done\"",
+						"path":    "/home/user",
+					},
+				},
+			},
+		},
+		{
+			name:    "numeric_arguments",
+			content: "calculate<｜tool▁sep｜>{\"x\":3.14,\"y\":42,\"enabled\":true}",
+			expected: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "calculate",
+					Arguments: api.ToolCallFunctionArguments{
+						"x":       3.14,
+						"y":       float64(42),
+						"enabled": true,
+					},
+				},
+			},
+		},
+		{
+			name:        "invalid_format_no_separator",
+			content:     "get_weather{\"location\":\"Paris\"}",
+			expectError: true,
+		},
+		{
+			name:        "invalid_json",
+			content:     "get_weather<｜tool▁sep｜>{invalid json}",
+			expectError: true,
+		},
+		{
+			name:        "empty_tool_name",
+			content:     "<｜tool▁sep｜>{\"arg\":\"value\"}",
+			expectError: false, // This should work, just empty name
+			expected: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "",
+					Arguments: api.ToolCallFunctionArguments{
+						"arg": "value",
+					},
+				},
+			},
+		},
+		{
+			name:        "missing_json_part",
+			content:     "tool_name<｜tool▁sep｜>",
+			expectError: true,
+		},
+	}
+
+	parser := &DeepSeek3Parser{}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := parser.parseToolCallContent(tt.content)
+
+			if tt.expectError {
+				if err == nil {
+					t.Error("Expected error but got none")
+				}
+				return
+			}
+
+			if err != nil {
+				t.Fatalf("Unexpected error: %v", err)
+			}
+
+			if diff := cmp.Diff(tt.expected, result); diff != "" {
+				t.Errorf("parseToolCallContent() mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
+
+func TestDeepSeekParser_EdgeCases(t *testing.T) {
+	tests := []struct {
+		name             string
+		input            string
+		expectedContent  string
+		expectedThinking string
+		hasThinking      bool
+	}{
+		{
+			name:             "nested_think_tags_in_thinking",
+			input:            "Outer thinking <think>inner</think> content</think>Final content",
+			expectedThinking: "Outer thinking <think>inner",
+			expectedContent:  "content</think>Final content",
+			hasThinking:      true,
+		},
+		{
+			name:             "multiple_think_close_tags",
+			input:            "First thought</think>Second thought</think>Final content",
+			expectedThinking: "First thought",
+			expectedContent:  "Second thought</think>Final content",
+			hasThinking:      true,
+		},
+		{
+			name:             "empty_thinking_content",
+			input:            "</think>Just content",
+			expectedThinking: "",
+			expectedContent:  "Just content",
+			hasThinking:      true,
+		},
+		{
+			name:            "thinking_disabled_with_think_tags",
+			input:           "Some content</think>More content",
+			expectedContent: "Some content</think>More content",
+			hasThinking:     false,
+		},
+		{
+			name:            "malformed_tool_call_missing_sep",
+			input:           "Testing.<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>bad_tool{\"arg\":\"value\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
+			expectedContent: "Testing.",
+			hasThinking:     false,
+		},
+		{
+			name:            "malformed_tool_call_invalid_json",
+			input:           "Testing.<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>bad_tool<｜tool▁sep｜>{invalid json}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
+			expectedContent: "Testing.",
+			hasThinking:     false,
+		},
+		{
+			name:            "partial_tool_tag_at_end",
+			input:           "Content with partial <｜tool▁calls▁",
+			expectedContent: "Content with partial <｜tool▁calls▁",
+			hasThinking:     false,
+		},
+		{
+			name:            "partial_think_tag_at_end",
+			input:           "Thinking content</th",
+			expectedContent: "Thinking content</th",
+			hasThinking:     false,
+		},
+		{
+			name:             "partial_think_tag_at_end_with_thinking",
+			input:            "Thinking content</th",
+			expectedThinking: "Thinking content",
+			expectedContent:  "",
+			hasThinking:      true,
+		},
+		{
+			name:            "whitespace_only_content",
+			input:           "   \n\t   ",
+			expectedContent: "   \n\t   ",
+			hasThinking:     false,
+		},
+		{
+			name:            "tool_output_with_newlines",
+			input:           "Output:\n<｜tool▁output▁begin｜>Line 1\nLine 2\nLine 3<｜tool▁output▁end｜>\nDone.",
+			expectedContent: "Output:\nLine 1\nLine 2\nLine 3\nDone.",
+			hasThinking:     false,
+		},
+		{
+			name:            "consecutive_tool_calls",
+			input:           "First.<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>tool1<｜tool▁sep｜>{}<｜tool▁call▁end｜><｜tool▁calls▁end｜>Second.<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>tool2<｜tool▁sep｜>{}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
+			expectedContent: "First.",
+			hasThinking:     false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			parser := &DeepSeek3Parser{hasThinkingSupport: tt.hasThinking}
+			parser.Init([]api.Tool{}, nil, &api.ThinkValue{Value: tt.hasThinking})
+
+			content, thinking, _, err := parser.Add(tt.input, true)
+			if err != nil {
+				t.Fatalf("Add() error = %v", err)
+			}
+
+			if diff := cmp.Diff(tt.expectedContent, content); diff != "" {
+				t.Errorf("Content mismatch (-want +got):\n%s", diff)
+			}
+
+			if diff := cmp.Diff(tt.expectedThinking, thinking); diff != "" {
+				t.Errorf("Thinking mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
--- a/model/parsers/parsers.go
+++ b/model/parsers/parsers.go
@@ -58,6 +58,8 @@ func ParserForName(name string) Parser {
 		return harmony.NewHarmonyMessageHandler()
 	case "cogito":
 		return &CogitoParser{}
+	case "deepseek3":
+		return &DeepSeek3Parser{hasThinkingSupport: true}
 	case "olmo3":
 		return &Olmo3Parser{}
 	case "olmo3-think":
--- a/model/renderers/deepseek3.go
+++ b/model/renderers/deepseek3.go
@@ -70,7 +70,16 @@ func (r *DeepSeek3Renderer) Render(messages []api.Message, tools []api.Tool, thi
 	isTool := false
 	isLastUser := false

-	for _, message := range messages {
+	// Find the index of the last user message to determine which assistant message is "current"
+	lastUserIndex := -1
+	for i := len(messages) - 1; i >= 0; i-- {
+		if messages[i].Role == "user" {
+			lastUserIndex = i
+			break
+		}
+	}
+
+	for i, message := range messages {
 		switch message.Role {
 		case "user":
 			isTool = false
@@ -101,9 +110,11 @@ func (r *DeepSeek3Renderer) Render(messages []api.Message, tools []api.Tool, thi
 			} else {
 				if isLastUser {
 					sb.WriteString("<｜Assistant｜>")
-					// message["prefix"] is defined and message["prefix"] and thinking
-					// message.Thinking != "" represents message["prefix"] being defined
-					if message.Thinking != "" && thinking {
+					hasThinking := message.Thinking != ""
+
+					// only use <think> for the current turn (after last user message)
+					isCurrentTurn := i > lastUserIndex
+					if hasThinking && thinking && isCurrentTurn {
 						sb.WriteString("<think>")
 					} else {
 						sb.WriteString("</think>")
--- a/model/renderers/deepseek3_test.go
+++ b/model/renderers/deepseek3_test.go
@@ -422,7 +422,7 @@ Second instruction<｜User｜>Hello<｜Assistant｜></think>`,
 				{Role: "user", Content: "How do they interact with matter?"},
 			},
 			thinkValue: &api.ThinkValue{Value: true},
-			expected:   `<｜begin▁of▁sentence｜><｜User｜>Explain quantum physics<｜Assistant｜><think>Quantum physics is the study of matter and energy at the smallest scales.<｜end▁of▁sentence｜><｜User｜>What about photons?<｜Assistant｜></think>Photons are particles of light with no mass.<｜end▁of▁sentence｜><｜User｜>How do they interact with matter?<｜Assistant｜><think>`,
+			expected:   `<｜begin▁of▁sentence｜><｜User｜>Explain quantum physics<｜Assistant｜></think>Quantum physics is the study of matter and energy at the smallest scales.<｜end▁of▁sentence｜><｜User｜>What about photons?<｜Assistant｜></think>Photons are particles of light with no mass.<｜end▁of▁sentence｜><｜User｜>How do they interact with matter?<｜Assistant｜><think>`,
 		},
 		{
 			name: "tool call with thinking content in response",
@@ -959,6 +959,73 @@ Where:
 - For multiple tool calls, chain them directly without separators or spaces
 <｜User｜>What's the weather?<｜Assistant｜></think>`,
 		},
+		{
+			name: "multi-turn conversation with thinking content on each turn",
+			messages: []api.Message{
+				{Role: "user", Content: "hey!"},
+				{
+					Role:     "assistant",
+					Content:  "Hey! 😊 How's it going? What's on your mind today?",
+					Thinking: "Hmm, the user just said \"hey!\" which is a simple greeting. This is a straightforward opening where they're likely just starting a conversation or testing the interaction.",
+				},
+				{Role: "user", Content: "fantastic, how has yours been"},
+				{
+					Role:     "assistant",
+					Content:  "Glad to hear you're having a fantastic day! That's awesome.\n\nMine's been great, thanks for asking! Just buzzing along, helping people out and having conversations like this one. So what's making your day so fantastic? Anything fun happening?",
+					Thinking: "Ah, the user is responding warmly and asking about my \"day.\" Since I'm an AI, I need to gently remind them I don't experience time like a human, but frame it positively to keep the conversation flowing.",
+				},
+				{Role: "user", Content: "awesome, can you tell me a 10 word story?"},
+			},
+			thinkValue: &api.ThinkValue{Value: true},
+			expected: `<｜begin▁of▁sentence｜><｜User｜>hey!<｜Assistant｜></think>Hey! 😊 How's it going? What's on your mind today?<｜end▁of▁sentence｜><｜User｜>fantastic, how has yours been<｜Assistant｜></think>Glad to hear you're having a fantastic day! That's awesome.
+
+Mine's been great, thanks for asking! Just buzzing along, helping people out and having conversations like this one. So what's making your day so fantastic? Anything fun happening?<｜end▁of▁sentence｜><｜User｜>awesome, can you tell me a 10 word story?<｜Assistant｜><think>`,
+		},
+		{
+			name: "vLLM documentation example - multi-turn with full thinking content",
+			messages: []api.Message{
+				{Role: "system", Content: "You are a helpful assistant"},
+				{Role: "user", Content: "Who are you?"},
+				{
+					Role:     "assistant",
+					Content:  "I am DeepSeek",
+					Thinking: "Hmm",
+				},
+				{Role: "user", Content: "9.11 and 9.8, which is greater?"},
+				{
+					Role:     "assistant",
+					Content:  "9.8 is greater than 9.11.\n\nTo compare them easily, you can align the decimal places:  \n- 9.11 has a tenths digit of 1 and a hundredths digit of 1.  \n- 9.8 can be written as 9.80, which has a tenths digit of 8 and a hundredths digit of 0.  \n\nSince the whole number part (9) is the same, compare the tenths place: 8 is greater than 1, so 9.80 (or 9.8) is greater than 9.11.",
+					Thinking: "First, the user is asking which number is greater between 9.11 and 9.8. These are decimal numbers.\n\nI need to compare 9.11 and 9.8. To make it easier, I should think of them as decimals with the same number of decimal places.\n\n9.11 has two decimal places, and 9.8 has one decimal place. I can write 9.8 as 9.80 to make it comparable.\n\nSo, 9.11 versus 9.80.\n\nNow, comparing the whole number parts: both have 9, so they are equal in the units place.\n\nNext, compare the tenths place: for 9.11, the tenths digit is 1. For 9.80, the tenths digit is 8. Since 8 is greater than 1, 9.80 is greater than 9.11.\n\nTherefore, 9.8 is greater than 9.11.\n\nI can also think of them as fractions: 9.11 is 911/100, and 9.8 is 98/10 or 980/100. Comparing 911/100 and 980/100, 980/100 is larger, so 9.8 is greater.\n\nSo, the answer should be that 9.8 is greater than 9.11.\n\nNow, I need to respond helpfully. Since the user might be learning or need clarification, I should explain briefly.\n\nFinally, my response should be in English, as the query is in English.",
+				},
+				{Role: "user", Content: "Thanks! Can you tell me a 10 word story?"},
+				{
+					Role:     "assistant",
+					Content:  "Of course. Here is a 10-word story:\n\nHe found the key, unlocking a door to forgotten memories.",
+					Thinking: "Hmm, the user just asked for a 10-word story after the previous number comparison question. This is a quick creative task with a strict word count constraint. \n\nThe story needs to be exactly 10 words while maintaining coherence and a hint of narrative. A micro-story about finding a key could work - it implies a larger unseen story. \n\nChecking the word count: \"He found the key, unlocking a door to forgotten memories.\" That's 10 words with a beginning, middle and implied end. It fits the requirement while leaving room for imagination.",
+				},
+				{Role: "user", Content: "That was beautiful! Now can you write a haiku?"},
+			},
+			thinkValue: &api.ThinkValue{Value: true},
+			expected: `<｜begin▁of▁sentence｜>You are a helpful assistant<｜User｜>Who are you?<｜Assistant｜></think>I am DeepSeek<｜end▁of▁sentence｜><｜User｜>9.11 and 9.8, which is greater?<｜Assistant｜></think>9.8 is greater than 9.11.
+
+To compare them easily, you can align the decimal places:  
+- 9.11 has a tenths digit of 1 and a hundredths digit of 1.  
+- 9.8 can be written as 9.80, which has a tenths digit of 8 and a hundredths digit of 0.  
+
+Since the whole number part (9) is the same, compare the tenths place: 8 is greater than 1, so 9.80 (or 9.8) is greater than 9.11.<｜end▁of▁sentence｜><｜User｜>Thanks! Can you tell me a 10 word story?<｜Assistant｜></think>Of course. Here is a 10-word story:
+
+He found the key, unlocking a door to forgotten memories.<｜end▁of▁sentence｜><｜User｜>That was beautiful! Now can you write a haiku?<｜Assistant｜><think>`,
+		},
+		{
+			name: "no system prompt - content with embedded thinking tags",
+			messages: []api.Message{
+				{Role: "user", Content: "Who are you?"},
+				{Role: "assistant", Content: "<think>Hmm</think>I am DeepSeek"},
+				{Role: "user", Content: "Thanks! Can you tell me a 10 word story?"},
+			},
+			thinkValue: &api.ThinkValue{Value: true},
+			expected:   `<｜begin▁of▁sentence｜><｜User｜>Who are you?<｜Assistant｜></think>I am DeepSeek<｜end▁of▁sentence｜><｜User｜>Thanks! Can you tell me a 10 word story?<｜Assistant｜><think>`,
+		},
 	}

 	renderer := &DeepSeek3Renderer{IsThinking: true, Variant: Deepseek31}
--- a/model/renderers/renderer.go
+++ b/model/renderers/renderer.go
@@ -59,7 +59,7 @@ func rendererForName(name string) Renderer {
 	case "cogito":
 		renderer := &CogitoRenderer{isThinking: true}
 		return renderer
-	case "deepseek-v3.1":
+	case "deepseek3.1":
 		renderer := &DeepSeek3Renderer{IsThinking: true, Variant: Deepseek31}
 		return renderer
 	case "olmo3":