From 90d429f5a8151f3e26ef5003810707b82a4572d3 Mon Sep 17 00:00:00 2001 From: Patrick Devine Date: Wed, 8 Oct 2025 16:50:13 -0700 Subject: [PATCH] thinking: turn on thinking mode for all reasoning models (#12533) --- api/types.go | 2 +- openai/openai.go | 17 +++++++++++------ server/routes.go | 24 ++++++++++++++++++------ server/routes_generate_test.go | 7 ------- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/api/types.go b/api/types.go index 8cc7752c..d0669b90 100644 --- a/api/types.go +++ b/api/types.go @@ -936,7 +936,7 @@ func (t *ThinkValue) UnmarshalJSON(data []byte) error { return nil } - return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\")") + return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\", true, or false)") } // MarshalJSON implements json.Marshaler diff --git a/openai/openai.go b/openai/openai.go index 55e55e97..a9169445 100644 --- a/openai/openai.go +++ b/openai/openai.go @@ -9,6 +9,7 @@ import ( "log/slog" "math/rand" "net/http" + "slices" "strings" "time" @@ -82,7 +83,7 @@ type StreamOptions struct { } type Reasoning struct { - Effort *string `json:"effort,omitempty"` + Effort string `json:"effort,omitempty"` } type ChatCompletionRequest struct { @@ -567,13 +568,17 @@ func FromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) { var think *api.ThinkValue if r.Reasoning != nil { - think = &api.ThinkValue{ - Value: *r.Reasoning.Effort, + if !slices.Contains([]string{"high", "medium", "low", "none"}, r.Reasoning.Effort) { + return nil, fmt.Errorf("invalid reasoning value: '%s' (must be \"high\", \"medium\", \"low\", or \"none\")", r.Reasoning.Effort) + } + + if r.Reasoning.Effort == "none" { + think = &api.ThinkValue{Value: false} + } else { + think = &api.ThinkValue{Value: r.Reasoning.Effort} } } else if r.ReasoningEffort != nil { - think = &api.ThinkValue{ - Value: *r.ReasoningEffort, - } + think = &api.ThinkValue{Value: *r.ReasoningEffort} } return &api.ChatRequest{ diff --git a/server/routes.go b/server/routes.go index c7052f1b..ec7ceba3 100644 --- a/server/routes.go +++ b/server/routes.go @@ -330,12 +330,16 @@ func (s *Server) GenerateHandler(c *gin.Context) { if req.Suffix != "" { caps = append(caps, model.CapabilityInsert) } - if req.Think != nil && req.Think.Bool() { + + modelCaps := m.Capabilities() + if req.Think != nil { caps = append(caps, model.CapabilityThinking) - // TODO(drifkin): consider adding a warning if it's false and the model - // doesn't support thinking. It's not strictly required, but it can be a - // hint that the user is on an older qwen3/r1 model that doesn't have an - // updated template supporting thinking + } else { + // add thinking if the model supports it + if slices.Contains(modelCaps, model.CapabilityThinking) { + caps = append(caps, model.CapabilityThinking) + req.Think = &api.ThinkValue{Value: true} + } } r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive) @@ -1871,8 +1875,16 @@ func (s *Server) ChatHandler(c *gin.Context) { if len(req.Tools) > 0 { caps = append(caps, model.CapabilityTools) } - if req.Think != nil && req.Think.Bool() { + + modelCaps := m.Capabilities() + if req.Think != nil { caps = append(caps, model.CapabilityThinking) + } else { + // add thinking if the model supports it + if slices.Contains(modelCaps, model.CapabilityThinking) { + caps = append(caps, model.CapabilityThinking) + req.Think = &api.ThinkValue{Value: true} + } } r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive) diff --git a/server/routes_generate_test.go b/server/routes_generate_test.go index 8385cb17..9f5d6ad1 100644 --- a/server/routes_generate_test.go +++ b/server/routes_generate_test.go @@ -1120,13 +1120,6 @@ func TestChatWithPromptEndingInThinkTag(t *testing.T) { "The answer is 4.", true) - testChatRequest(t, "thinking disabled but template still adds think tag", - "Simple question", - " My thoughts The answer.", - "", - " My thoughts The answer.", - false) - // Test streaming response with template-added t.Run("streaming with thinking", func(t *testing.T) { var wg sync.WaitGroup