From 90d429f5a8151f3e26ef5003810707b82a4572d3 Mon Sep 17 00:00:00 2001
From: Patrick Devine <patrick@infrahq.com>
Date: Wed, 8 Oct 2025 16:50:13 -0700
Subject: [PATCH] thinking: turn on thinking mode for all reasoning models
 (#12533)

---
 api/types.go                   |  2 +-
 openai/openai.go               | 17 +++++++++++------
 server/routes.go               | 24 ++++++++++++++++++------
 server/routes_generate_test.go |  7 -------
 4 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/api/types.go b/api/types.go
index 8cc7752c..d0669b90 100644
--- a/api/types.go
+++ b/api/types.go
@@ -936,7 +936,7 @@ func (t *ThinkValue) UnmarshalJSON(data []byte) error {
 		return nil
 	}
 
-	return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\")")
+	return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\", true, or false)")
 }
 
 // MarshalJSON implements json.Marshaler
diff --git a/openai/openai.go b/openai/openai.go
index 55e55e97..a9169445 100644
--- a/openai/openai.go
+++ b/openai/openai.go
@@ -9,6 +9,7 @@ import (
 	"log/slog"
 	"math/rand"
 	"net/http"
+	"slices"
 	"strings"
 	"time"
 
@@ -82,7 +83,7 @@ type StreamOptions struct {
 }
 
 type Reasoning struct {
-	Effort *string `json:"effort,omitempty"`
+	Effort string `json:"effort,omitempty"`
 }
 
 type ChatCompletionRequest struct {
@@ -567,13 +568,17 @@ func FromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
 
 	var think *api.ThinkValue
 	if r.Reasoning != nil {
-		think = &api.ThinkValue{
-			Value: *r.Reasoning.Effort,
+		if !slices.Contains([]string{"high", "medium", "low", "none"}, r.Reasoning.Effort) {
+			return nil, fmt.Errorf("invalid reasoning value: '%s' (must be \"high\", \"medium\", \"low\", or \"none\")", r.Reasoning.Effort)
+		}
+
+		if r.Reasoning.Effort == "none" {
+			think = &api.ThinkValue{Value: false}
+		} else {
+			think = &api.ThinkValue{Value: r.Reasoning.Effort}
 		}
 	} else if r.ReasoningEffort != nil {
-		think = &api.ThinkValue{
-			Value: *r.ReasoningEffort,
-		}
+		think = &api.ThinkValue{Value: *r.ReasoningEffort}
 	}
 
 	return &api.ChatRequest{
diff --git a/server/routes.go b/server/routes.go
index c7052f1b..ec7ceba3 100644
--- a/server/routes.go
+++ b/server/routes.go
@@ -330,12 +330,16 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 	if req.Suffix != "" {
 		caps = append(caps, model.CapabilityInsert)
 	}
-	if req.Think != nil && req.Think.Bool() {
+
+	modelCaps := m.Capabilities()
+	if req.Think != nil {
 		caps = append(caps, model.CapabilityThinking)
-		// TODO(drifkin): consider adding a warning if it's false and the model
-		// doesn't support thinking. It's not strictly required, but it can be a
-		// hint that the user is on an older qwen3/r1 model that doesn't have an
-		// updated template supporting thinking
+	} else {
+		// add thinking if the model supports it
+		if slices.Contains(modelCaps, model.CapabilityThinking) {
+			caps = append(caps, model.CapabilityThinking)
+			req.Think = &api.ThinkValue{Value: true}
+		}
 	}
 
 	r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
@@ -1871,8 +1875,16 @@ func (s *Server) ChatHandler(c *gin.Context) {
 	if len(req.Tools) > 0 {
 		caps = append(caps, model.CapabilityTools)
 	}
-	if req.Think != nil && req.Think.Bool() {
+
+	modelCaps := m.Capabilities()
+	if req.Think != nil {
 		caps = append(caps, model.CapabilityThinking)
+	} else {
+		// add thinking if the model supports it
+		if slices.Contains(modelCaps, model.CapabilityThinking) {
+			caps = append(caps, model.CapabilityThinking)
+			req.Think = &api.ThinkValue{Value: true}
+		}
 	}
 
 	r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
diff --git a/server/routes_generate_test.go b/server/routes_generate_test.go
index 8385cb17..9f5d6ad1 100644
--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@@ -1120,13 +1120,6 @@ func TestChatWithPromptEndingInThinkTag(t *testing.T) {
 		"The answer is 4.",
 		true)
 
-	testChatRequest(t, "thinking disabled but template still adds think tag",
-		"Simple question",
-		" My thoughts </think> The answer.",
-		"",
-		" My thoughts </think> The answer.",
-		false)
-
 	// Test streaming response with template-added <think>
 	t.Run("streaming with thinking", func(t *testing.T) {
 		var wg sync.WaitGroup