thinking: turn on thinking mode for all reasoning models (#12533)

2025-12-21 14:26:30 +00:00 · 2025-10-08 16:50:13 -07:00
parent 1fc35f1260
commit 90d429f5a8
4 changed files with 30 additions and 20 deletions
--- a/api/types.go
+++ b/api/types.go
@@ -936,7 +936,7 @@ func (t *ThinkValue) UnmarshalJSON(data []byte) error {
 		return nil
 	}
-	return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\")")
+	return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\", true, or false)")
 }
 // MarshalJSON implements json.Marshaler
--- a/openai/openai.go
+++ b/openai/openai.go
@@ -9,6 +9,7 @@ import (
 	"log/slog"
 	"math/rand"
 	"net/http"
 	"slices"
 	"strings"
 	"time"
@@ -82,7 +83,7 @@ type StreamOptions struct {
 }
 type Reasoning struct {
-	Effort *string `json:"effort,omitempty"`
+	Effort string `json:"effort,omitempty"`
 }
 type ChatCompletionRequest struct {
@@ -567,13 +568,17 @@ func FromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
 	var think *api.ThinkValue
 	if r.Reasoning != nil {
-		think = &api.ThinkValue{
+		if !slices.Contains([]string{"high", "medium", "low", "none"}, r.Reasoning.Effort) {
-			Value: *r.Reasoning.Effort,
+			return nil, fmt.Errorf("invalid reasoning value: '%s' (must be \"high\", \"medium\", \"low\", or \"none\")", r.Reasoning.Effort)
 		}
 		if r.Reasoning.Effort == "none" {
 			think = &api.ThinkValue{Value: false}
 		} else {
 			think = &api.ThinkValue{Value: r.Reasoning.Effort}
 		}
 	} else if r.ReasoningEffort != nil {
-		think = &api.ThinkValue{
+		think = &api.ThinkValue{Value: *r.ReasoningEffort}
 			Value: *r.ReasoningEffort,
 		}
 	}
 	return &api.ChatRequest{
--- a/server/routes.go
+++ b/server/routes.go
@@ -330,12 +330,16 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 	if req.Suffix != "" {
 		caps = append(caps, model.CapabilityInsert)
 	}
-	if req.Think != nil && req.Think.Bool() {
+
 	modelCaps := m.Capabilities()
 	if req.Think != nil {
 		caps = append(caps, model.CapabilityThinking)
-		// TODO(drifkin): consider adding a warning if it's false and the model
+	} else {
-		// doesn't support thinking. It's not strictly required, but it can be a
+		// add thinking if the model supports it
-		// hint that the user is on an older qwen3/r1 model that doesn't have an
+		if slices.Contains(modelCaps, model.CapabilityThinking) {
-		// updated template supporting thinking
+			caps = append(caps, model.CapabilityThinking)
 			req.Think = &api.ThinkValue{Value: true}
 		}
 	}
 	r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
@@ -1871,8 +1875,16 @@ func (s *Server) ChatHandler(c *gin.Context) {
 	if len(req.Tools) > 0 {
 		caps = append(caps, model.CapabilityTools)
 	}
-	if req.Think != nil && req.Think.Bool() {
+
 	modelCaps := m.Capabilities()
 	if req.Think != nil {
 		caps = append(caps, model.CapabilityThinking)
 	} else {
 		// add thinking if the model supports it
 		if slices.Contains(modelCaps, model.CapabilityThinking) {
 			caps = append(caps, model.CapabilityThinking)
 			req.Think = &api.ThinkValue{Value: true}
 		}
 	}
 	r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@@ -1120,13 +1120,6 @@ func TestChatWithPromptEndingInThinkTag(t *testing.T) {
 		"The answer is 4.",
 		true)
 	testChatRequest(t, "thinking disabled but template still adds think tag",
 		"Simple question",
 		" My thoughts </think> The answer.",
 		"",
 		" My thoughts </think> The answer.",
 		false)
 	// Test streaming response with template-added <think>
 	t.Run("streaming with thinking", func(t *testing.T) {
 		var wg sync.WaitGroup