mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 14:26:30 +00:00
thinking: turn on thinking mode for all reasoning models (#12533)
This commit is contained in:
@@ -936,7 +936,7 @@ func (t *ThinkValue) UnmarshalJSON(data []byte) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\")")
|
return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\", true, or false)")
|
||||||
}
|
}
|
||||||
|
|
||||||
// MarshalJSON implements json.Marshaler
|
// MarshalJSON implements json.Marshaler
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"log/slog"
|
"log/slog"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -82,7 +83,7 @@ type StreamOptions struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Reasoning struct {
|
type Reasoning struct {
|
||||||
Effort *string `json:"effort,omitempty"`
|
Effort string `json:"effort,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ChatCompletionRequest struct {
|
type ChatCompletionRequest struct {
|
||||||
@@ -567,13 +568,17 @@ func FromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
|||||||
|
|
||||||
var think *api.ThinkValue
|
var think *api.ThinkValue
|
||||||
if r.Reasoning != nil {
|
if r.Reasoning != nil {
|
||||||
think = &api.ThinkValue{
|
if !slices.Contains([]string{"high", "medium", "low", "none"}, r.Reasoning.Effort) {
|
||||||
Value: *r.Reasoning.Effort,
|
return nil, fmt.Errorf("invalid reasoning value: '%s' (must be \"high\", \"medium\", \"low\", or \"none\")", r.Reasoning.Effort)
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.Reasoning.Effort == "none" {
|
||||||
|
think = &api.ThinkValue{Value: false}
|
||||||
|
} else {
|
||||||
|
think = &api.ThinkValue{Value: r.Reasoning.Effort}
|
||||||
}
|
}
|
||||||
} else if r.ReasoningEffort != nil {
|
} else if r.ReasoningEffort != nil {
|
||||||
think = &api.ThinkValue{
|
think = &api.ThinkValue{Value: *r.ReasoningEffort}
|
||||||
Value: *r.ReasoningEffort,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return &api.ChatRequest{
|
return &api.ChatRequest{
|
||||||
|
|||||||
@@ -330,12 +330,16 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
if req.Suffix != "" {
|
if req.Suffix != "" {
|
||||||
caps = append(caps, model.CapabilityInsert)
|
caps = append(caps, model.CapabilityInsert)
|
||||||
}
|
}
|
||||||
if req.Think != nil && req.Think.Bool() {
|
|
||||||
|
modelCaps := m.Capabilities()
|
||||||
|
if req.Think != nil {
|
||||||
caps = append(caps, model.CapabilityThinking)
|
caps = append(caps, model.CapabilityThinking)
|
||||||
// TODO(drifkin): consider adding a warning if it's false and the model
|
} else {
|
||||||
// doesn't support thinking. It's not strictly required, but it can be a
|
// add thinking if the model supports it
|
||||||
// hint that the user is on an older qwen3/r1 model that doesn't have an
|
if slices.Contains(modelCaps, model.CapabilityThinking) {
|
||||||
// updated template supporting thinking
|
caps = append(caps, model.CapabilityThinking)
|
||||||
|
req.Think = &api.ThinkValue{Value: true}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
|
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
|
||||||
@@ -1871,8 +1875,16 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
if len(req.Tools) > 0 {
|
if len(req.Tools) > 0 {
|
||||||
caps = append(caps, model.CapabilityTools)
|
caps = append(caps, model.CapabilityTools)
|
||||||
}
|
}
|
||||||
if req.Think != nil && req.Think.Bool() {
|
|
||||||
|
modelCaps := m.Capabilities()
|
||||||
|
if req.Think != nil {
|
||||||
caps = append(caps, model.CapabilityThinking)
|
caps = append(caps, model.CapabilityThinking)
|
||||||
|
} else {
|
||||||
|
// add thinking if the model supports it
|
||||||
|
if slices.Contains(modelCaps, model.CapabilityThinking) {
|
||||||
|
caps = append(caps, model.CapabilityThinking)
|
||||||
|
req.Think = &api.ThinkValue{Value: true}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
|
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
|
||||||
|
|||||||
@@ -1120,13 +1120,6 @@ func TestChatWithPromptEndingInThinkTag(t *testing.T) {
|
|||||||
"The answer is 4.",
|
"The answer is 4.",
|
||||||
true)
|
true)
|
||||||
|
|
||||||
testChatRequest(t, "thinking disabled but template still adds think tag",
|
|
||||||
"Simple question",
|
|
||||||
" My thoughts </think> The answer.",
|
|
||||||
"",
|
|
||||||
" My thoughts </think> The answer.",
|
|
||||||
false)
|
|
||||||
|
|
||||||
// Test streaming response with template-added <think>
|
// Test streaming response with template-added <think>
|
||||||
t.Run("streaming with thinking", func(t *testing.T) {
|
t.Run("streaming with thinking", func(t *testing.T) {
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|||||||
Reference in New Issue
Block a user