Compare commits

..

37 Commits

Author SHA1 Message Date
likelovewant
71a4057fcf Merge branch 'ollama:main' into main 2025-06-19 21:11:00 +08:00
likelovewant
5ab7422508 add 2025-06-19 21:05:38 +08:00
Jeffrey Morgan
8bcb3125c1 benchmark: remove unused benchmark test (#11120)
Removes a test under benchmark/ that is unused
2025-06-18 12:58:50 -07:00
Jeffrey Morgan
6baf1e31e2 Revert "Revert "ggml: Export GPU UUIDs" (#11115)" (#11117)
Reverts PR #11115. The original change was mistakingly reverted instead of #10822
2025-06-18 07:30:49 -07:00
Jeffrey Morgan
ed567ef43b Revert "ggml: Export GPU UUIDs" (#11115)
This reverts commit aaa7818000.
2025-06-18 05:45:00 -07:00
Jeffrey Morgan
a6e64fbdf2 Revert "feat: incremental gguf parser (#10822)" (#11114)
This reverts commit 6b04cad7e8.
2025-06-18 05:42:44 -07:00
曹家巧
60cfa2a203 cache: fix comment function name in cache.go (#11110) 2025-06-18 05:21:45 -07:00
Jeffrey Morgan
55bbf3b4a1 tools: return empty arguments object instead of null (#11113) 2025-06-18 05:20:43 -07:00
Jeffrey Morgan
6bda1d2479 tools: fix parsing tool calls without any parameters (#11101)
Fixes issue where tool calls that don't expect any parameters were
not being parsed. This also fixes two additional issues: one where
2+ tool calls would not be correctly parsed, and cases where tool calls
with invalid parameters would still get parsed
2025-06-17 10:51:43 -07:00
likelovewant
50f2219dd6 Merge branch 'ollama:main' into main 2025-06-18 00:20:43 +08:00
Jeffrey Morgan
9e125d884c model: treat 'user defined' tokens as special tokens (#11077) 2025-06-16 16:03:16 -07:00
Michael Yang
a6fbfc880c gguf: fix write order (#11068)
* ggml: test write gguf order
* ggml: fix write tensor order
2025-06-16 10:42:32 -07:00
NGC13009
502028968d readme: add ollama-launcher to community integrations (#11080) 2025-06-15 21:27:49 -07:00
Phil
5a8eb0e151 readme: add GPTranslate to community integrations (#11071) 2025-06-14 08:54:03 -07:00
Jeffrey Morgan
9f8a18ec05 tools: loosen tool parsing to allow for more formats (#11030) 2025-06-12 14:18:54 -07:00
Michael Yang
6b04cad7e8 feat: incremental gguf parser (#10822)
* incremental gguf parser
* gguf: update test to not rely on gguf on disc
* re-use existing create gguf
* read capabilities from gguf kv
* kv exists
* update tests
* s/doneFunc/successFunc/g
* new buffered reader

---------

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2025-06-12 11:04:11 -07:00
Michael Yang
45f56355d5 feat: uneven splits (#11048)
The current splitDim function only operates on tensors that are split evenly which isn't always the case, e.g. a QKV tensor. This change allows the function to be used for arbitrary splits
2025-06-11 12:10:54 -07:00
Michael Yang
0dabb4ef6a skip tokenizer.model if possible (#11050)
if tokenizer.json is already copied, skip tokenizer.model
2025-06-11 12:10:35 -07:00
Michael Yang
2e77aa1ae7 use nn.Linear in place of ml.Tensor (#11049)
while nn.Linear.Forward isn't applicable for sparse MLP, it's still
a nice container for the tensors
2025-06-11 12:10:15 -07:00
Attogram Project
deaabe292d readme: add ollama-multirun to community integrations (#11038) 2025-06-10 14:14:51 -07:00
Jeffrey Morgan
af21a5ac39 readme: update quickstart link text to Gemma 3 2025-06-10 09:34:23 -07:00
Jeffrey Morgan
f63d7f68eb readme: update quickstart example to Gemma 3 2025-06-10 09:33:54 -07:00
Daniel Hiltgen
82ad1dbc07 mac: handle "keep" named apps (#11031)
When a user elects to keep the existing app, the
new Ollama is named `Ollama 2.app`
This fixes the app startup flow to handle this naming pattern.
2025-06-09 16:29:57 -07:00
Daniel Hiltgen
feeabdadd2 spawn desktop quickly (#11011)
Give the desktop app a hint to start fast.
2025-06-08 09:34:52 -07:00
Krzysztof Jeziorny
fc0309615e docs: update link to AMD drivers in linux.md (#10973) 2025-06-06 23:30:04 -04:00
Jeffrey Morgan
09d308d6b6 Revert "server: add model capabilities to the list endpoint (#10174)" (#11004)
This reverts commit 0943001193.
2025-06-06 23:29:14 -04:00
Daniel Hiltgen
a8ed68bd93 launch app hidden (#10962)
When starting the app in the background, start it hidden.
2025-06-06 14:06:29 -07:00
Daniel Hiltgen
2ae65ae471 win: handle more than 2048 processes (#10997)
Fix an array out of bounds crash
2025-06-06 14:06:09 -07:00
Devon Rifkin
a3b6886b7d move thinking logic into its own package (#10990)
move thinking logic into its own package
2025-06-06 12:02:20 -07:00
Hunter Wittenborn
c6a6d7294d docs: fix typo in development.md (#10998) 2025-06-06 12:07:29 -04:00
Devon Rifkin
2cf007c9d1 Merge pull request #10987 from ollama/drifkin/export-thinking-parser
export ThinkingParser
2025-06-05 12:19:14 -07:00
Devon Rifkin
0683efa637 export ThinkingParser 2025-06-05 10:22:32 -07:00
JasonHonKL
0943001193 server: add model capabilities to the list endpoint (#10174) 2025-06-04 11:39:48 -07:00
HardCodeDev
5c42800fca readme: add SimpleOllamaUnity to community integrations (#10817) 2025-05-30 19:50:16 -07:00
Parth Sareen
65f10c2823 tools: resiliency upgrade to name and arg extraction from template (#10917) 2025-05-30 15:18:09 -07:00
Jesse Gross
aaa7818000 ggml: Export GPU UUIDs
This enables matching up devices and information reported by the backend
with system management libraries such as nvml to get accurate free
memory reporting.
2025-05-29 14:01:26 -07:00
Jesse Gross
f15ffc4320 llm: Make "POST predict" error message more informative
"POST predict" basically means that the runner has crashed, which
can have many reasons. However, many people think this is a specific
error and either report only this message or group together unrelated
bugs. This replaces it with a more friendly and helpful message.
2025-05-29 09:41:19 -07:00
58 changed files with 2446 additions and 2802 deletions

View File

@@ -58,7 +58,7 @@
"name": "ROCm 6",
"inherits": [ "ROCm" ],
"cacheVariables": {
"AMDGPU_TARGETS": "gfx803;gfx902;gfx1030;gfx1031;gfx1032;gfx1034;gfx1035;gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1201;gfx900:xnack-;gfx906:xnack-;gfx90c:xnack-;gfx1010:xnack-;gfx1011:xnack-;gfx1012:xnack-;"
"AMDGPU_TARGETS": "gfx803;gfx902;gfx1030;gfx1031;gfx1032;gfx1034;gfx1035;gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1200;gfx1201;gfx900:xnack-;gfx906:xnack-;gfx90c:xnack-;gfx1010:xnack-;gfx1011:xnack-;gfx1012:xnack-;"
}
}
],

View File

@@ -62,10 +62,10 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla
## Quickstart
To run and chat with [Llama 3.2](https://ollama.com/library/llama3.2):
To run and chat with [Gemma 3](https://ollama.com/library/gemma3):
```shell
ollama run llama3.2
ollama run gemma3
```
## Model library
@@ -429,6 +429,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Lumina](https://github.com/cushydigit/lumina.git) (A lightweight, minimal React.js frontend for interacting with Ollama servers)
- [Tiny Notepad](https://pypi.org/project/tiny-notepad) (A lightweight, notepad-like interface to chat with ollama available on PyPI)
- [macLlama (macOS native)](https://github.com/hellotunamayo/macLlama) (A native macOS GUI application for interacting with Ollama models, featuring a chat interface.)
- [GPTranslate](https://github.com/philberndt/GPTranslate) (A fast and lightweight, AI powered desktop translation application written with Rust and Tauri. Features real-time translation with OpenAI/Azure/Ollama.)
- [ollama launcher](https://github.com/NGC13009/ollama-launcher) (A launcher for Ollama, aiming to provide users with convenient functions such as ollama server launching, management, or configuration.)
### Cloud
@@ -473,6 +475,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [orca-cli](https://github.com/molbal/orca-cli) Ollama Registry CLI Application - Browse, pull, and download models from Ollama Registry in your terminal.
- [GGUF-to-Ollama](https://github.com/jonathanhecl/gguf-to-ollama) - Importing GGUF to Ollama made easy (multiplatform)
- [AWS-Strands-With-Ollama](https://github.com/rapidarchitect/ollama_strands) - AWS Strands Agents with Ollama Examples
- [ollama-multirun](https://github.com/attogram/ollama-multirun) - A bash shell script to run a single prompt against any or all of your locally installed ollama models, saving the output and performance statistics as easily navigable web pages. ([Demo](https://attogram.github.io/ai_test_zone/))
### Apple Vision Pro
@@ -609,6 +612,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)
- [LLM Telegram Bot](https://github.com/innightwolfsleep/llm_telegram_bot) (telegram bot, primary for RP. Oobabooga-like buttons, [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) API integration e.t.c)
- [mcp-llm](https://github.com/sammcj/mcp-llm) (MCP Server to allow LLMs to call other LLMs)
- [SimpleOllamaUnity](https://github.com/HardCodeDev777/SimpleOllamaUnity) (Unity Engine extension for communicating with Ollama in a few lines of code. Also works at runtime)
- [UnityCodeLama](https://github.com/HardCodeDev777/UnityCodeLama) (Unity Edtior tool to analyze scripts via Ollama)
### Supported backends

View File

@@ -1,178 +0,0 @@
package benchmark
import (
"context"
"flag"
"fmt"
"testing"
"time"
"github.com/ollama/ollama/api"
)
// Command line flags
var modelFlag string
func init() {
flag.StringVar(&modelFlag, "m", "", "Name of the model to benchmark")
flag.Lookup("m").DefValue = "model"
}
// modelName returns the model name from flags, failing the test if not set
func modelName(b *testing.B) string {
if modelFlag == "" {
b.Fatal("Error: -m flag is required for benchmark tests")
}
return modelFlag
}
type TestCase struct {
name string
prompt string
maxTokens int
}
// runGenerateBenchmark contains the common generate and metrics logic
func runGenerateBenchmark(b *testing.B, ctx context.Context, client *api.Client, req *api.GenerateRequest) {
start := time.Now()
var ttft time.Duration
var metrics api.Metrics
err := client.Generate(ctx, req, func(resp api.GenerateResponse) error {
if ttft == 0 && resp.Response != "" {
ttft = time.Since(start)
}
if resp.Done {
metrics = resp.Metrics
}
return nil
})
// Report custom metrics as part of the benchmark results
b.ReportMetric(float64(ttft.Milliseconds()), "ttft_ms")
b.ReportMetric(float64(metrics.LoadDuration.Milliseconds()), "load_ms")
// Token throughput metrics
promptThroughput := float64(metrics.PromptEvalCount) / metrics.PromptEvalDuration.Seconds()
genThroughput := float64(metrics.EvalCount) / metrics.EvalDuration.Seconds()
b.ReportMetric(promptThroughput, "prompt_tok/s")
b.ReportMetric(genThroughput, "gen_tok/s")
// Token counts
b.ReportMetric(float64(metrics.PromptEvalCount), "prompt_tokens")
b.ReportMetric(float64(metrics.EvalCount), "gen_tokens")
if err != nil {
b.Fatal(err)
}
}
// BenchmarkColdStart runs benchmarks with model loading from cold state
func BenchmarkColdStart(b *testing.B) {
client := setup(b)
tests := []TestCase{
{"short_prompt", "Write a long story", 100},
{"medium_prompt", "Write a detailed economic analysis", 500},
{"long_prompt", "Write a comprehensive AI research paper", 1000},
}
m := modelName(b)
for _, tt := range tests {
b.Run(fmt.Sprintf("%s/cold/%s", m, tt.name), func(b *testing.B) {
ctx := b.Context()
// Set number of tokens as our throughput metric
b.SetBytes(int64(tt.maxTokens))
for b.Loop() {
b.StopTimer()
// Ensure model is unloaded before each iteration
unload(client, m, b)
b.StartTimer()
req := &api.GenerateRequest{
Model: m,
Prompt: tt.prompt,
Options: map[string]any{"num_predict": tt.maxTokens, "temperature": 0.1},
}
runGenerateBenchmark(b, ctx, client, req)
}
})
}
}
// BenchmarkWarmStart runs benchmarks with pre-loaded model
func BenchmarkWarmStart(b *testing.B) {
client := setup(b)
tests := []TestCase{
{"short_prompt", "Write a long story", 100},
{"medium_prompt", "Write a detailed economic analysis", 500},
{"long_prompt", "Write a comprehensive AI research paper", 1000},
}
m := modelName(b)
for _, tt := range tests {
b.Run(fmt.Sprintf("%s/warm/%s", m, tt.name), func(b *testing.B) {
ctx := b.Context()
// Pre-warm the model
warmup(client, m, tt.prompt, b)
// Set number of tokens as our throughput metric
b.SetBytes(int64(tt.maxTokens))
for b.Loop() {
req := &api.GenerateRequest{
Model: m,
Prompt: tt.prompt,
Options: map[string]any{"num_predict": tt.maxTokens, "temperature": 0.1},
}
runGenerateBenchmark(b, ctx, client, req)
}
})
}
}
// setup verifies server and model availability
func setup(b *testing.B) *api.Client {
client, err := api.ClientFromEnvironment()
if err != nil {
b.Fatal(err)
}
if _, err := client.Show(b.Context(), &api.ShowRequest{Model: modelName(b)}); err != nil {
b.Fatalf("Model unavailable: %v", err)
}
return client
}
// warmup ensures the model is loaded and warmed up
func warmup(client *api.Client, model string, prompt string, b *testing.B) {
for range 3 {
err := client.Generate(
context.Background(),
&api.GenerateRequest{
Model: model,
Prompt: prompt,
Options: map[string]any{"num_predict": 50, "temperature": 0.1},
},
func(api.GenerateResponse) error { return nil },
)
if err != nil {
b.Logf("Error during model warm-up: %v", err)
}
}
}
// unload forces model unloading using KeepAlive: 0 parameter
func unload(client *api.Client, model string, b *testing.B) {
req := &api.GenerateRequest{
Model: model,
KeepAlive: &api.Duration{Duration: 0},
}
if err := client.Generate(context.Background(), req, func(api.GenerateResponse) error { return nil }); err != nil {
b.Logf("Unload error: %v", err)
}
time.Sleep(1 * time.Second)
}

View File

@@ -5,7 +5,7 @@ import (
"errors"
"os"
"os/exec"
"strings"
"regexp"
"github.com/ollama/ollama/api"
)
@@ -19,11 +19,12 @@ func startApp(ctx context.Context, client *api.Client) error {
if err != nil {
return err
}
if !strings.Contains(link, "Ollama.app") {
r := regexp.MustCompile(`^.*/Ollama\s?\d*.app`)
m := r.FindStringSubmatch(link)
if len(m) != 1 {
return errors.New("could not find ollama app")
}
path := strings.Split(link, "Ollama.app")
if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
if err := exec.Command("/usr/bin/open", "-j", "-a", m[0], "--args", "--fast-startup").Run(); err != nil {
return err
}
return waitForServer(ctx, client)

View File

@@ -45,14 +45,11 @@ func startApp(ctx context.Context, client *api.Client) error {
}
}
}
// log.Printf("XXX attempting to start app %s", appExe)
cmd_path := "c:\\Windows\\system32\\cmd.exe"
cmd := exec.Command(cmd_path, "/c", appExe)
// TODO - these hide flags aren't working - still pops up a command window for some reason
cmd := exec.Command(cmd_path, "/c", appExe, "--hide", "--fast-startup")
cmd.SysProcAttr = &syscall.SysProcAttr{CreationFlags: 0x08000000, HideWindow: true}
// TODO this didn't help either...
cmd.Stdin = strings.NewReader("")
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
@@ -74,7 +71,16 @@ func isProcRunning(procName string) []uint32 {
slog.Debug("failed to check for running installers", "error", err)
return nil
}
pids = pids[:ret]
if ret > uint32(len(pids)) {
pids = make([]uint32, ret+10)
if err := windows.EnumProcesses(pids, &ret); err != nil || ret == 0 {
slog.Debug("failed to check for running installers", "error", err)
return nil
}
}
if ret < uint32(len(pids)) {
pids = pids[:ret]
}
var matches []uint32
for _, pid := range pids {
if pid == 0 {

View File

@@ -65,17 +65,17 @@ func (q *qwen25VLModel) Tensors(ts []Tensor) []*ggml.Tensor {
for _, t := range ts {
if strings.Contains(t.Name(), "patch_embed.proj") {
for t := range splitDim(t, 2,
strings.NewReplacer("patch_embed.proj", "patch_embd_0"),
strings.NewReplacer("patch_embed.proj", "patch_embd_1"),
split{Replacer: strings.NewReplacer("patch_embed.proj", "patch_embd_0")},
split{Replacer: strings.NewReplacer("patch_embed.proj", "patch_embd_1")},
) {
t.Shape = slices.DeleteFunc(t.Shape, func(i uint64) bool { return i == 1 })
out = append(out, t)
}
} else if strings.Contains(t.Name(), "attn.qkv") {
out = append(out, slices.Collect(splitDim(t, 0,
strings.NewReplacer("attn.qkv", "attn_q"),
strings.NewReplacer("attn.qkv", "attn_k"),
strings.NewReplacer("attn.qkv", "attn_v"),
split{Replacer: strings.NewReplacer("attn.qkv", "attn_q")},
split{Replacer: strings.NewReplacer("attn.qkv", "attn_k")},
split{Replacer: strings.NewReplacer("attn.qkv", "attn_v")},
))...)
} else {
out = append(out, &ggml.Tensor{

View File

@@ -1,53 +1,73 @@
package convert
import (
"cmp"
"iter"
"slices"
"strings"
"github.com/ollama/ollama/fs/ggml"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/fs/ggml"
)
type split struct {
*strings.Replacer
dim int
// fn is an optional function to apply to the tensor after slicing
fn func(tensor.Tensor) (tensor.Tensor, error)
}
// splitDim splits a tensor along a specified dimension into multiple tensors. The dimension
// is split evenly based on the number of replacers provided.
func splitDim(t Tensor, dim int, replacers ...*strings.Replacer) iter.Seq[*ggml.Tensor] {
// is split evenly based on the number of replacers provided unless a specific count is given.
func splitDim(t Tensor, dim int, splits ...split) iter.Seq[*ggml.Tensor] {
return func(yield func(*ggml.Tensor) bool) {
for i, replacer := range replacers {
var offset int
for _, split := range splits {
t := t.Clone()
shape := slices.Clone(t.Shape())
shape[dim] = shape[dim] / uint64(len(replacers))
shape[dim] = cmp.Or(uint64(split.dim), shape[dim]/uint64(len(splits)))
slice := slices.Repeat([]tensor.Slice{nil}, len(shape))
slice[dim] = tensor.S(i*int(shape[dim]), (i+1)*int(shape[dim]))
slice[dim] = tensor.S(offset, offset+int(shape[dim]))
offset += int(shape[dim])
tt := t.Clone()
tt.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
t.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
dims := make([]int, len(shape))
for i := range shape {
dims[i] = int(shape[i])
}
var t tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
t, err := t.Slice(slice...)
var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
tt, err := tt.Slice(slice...)
if err != nil {
return nil, err
}
t = tensor.Materialize(t)
tt = tensor.Materialize(tt)
if split.fn != nil {
tt, err = split.fn(tt)
if err != nil {
return nil, err
}
}
// flatten tensor so it can be written as a vector
if err := t.Reshape(t.Shape().TotalSize()); err != nil {
if err := tt.Reshape(tt.Shape().TotalSize()); err != nil {
return nil, err
}
return native.VectorF32(t.(*tensor.Dense))
return native.VectorF32(tt.(*tensor.Dense))
})
if !yield(&ggml.Tensor{
Name: replacer.Replace(t.Name()),
Name: split.Replace(t.Name()),
Kind: t.Kind(),
Shape: shape,
WriterTo: tt,
WriterTo: t,
}) {
break
}

304
convert/tensor_test.go Normal file
View File

@@ -0,0 +1,304 @@
package convert
import (
"bytes"
"encoding/binary"
"io"
"iter"
"slices"
"strings"
"testing"
"github.com/pdevine/tensor"
)
type fakeTensor struct {
name string
shape []uint64
data []float32
repacker Repacker
}
func (f fakeTensor) Name() string {
return f.name
}
func (f fakeTensor) Shape() []uint64 {
return f.shape
}
func (f fakeTensor) Kind() uint32 {
return 0
}
func (f *fakeTensor) SetRepacker(fn Repacker) {
f.repacker = fn
}
func (f fakeTensor) Clone() Tensor {
return &fakeTensor{
name: f.name,
shape: slices.Clone(f.shape),
data: slices.Clone(f.data),
repacker: f.repacker,
}
}
func (f fakeTensor) WriteTo(w io.Writer) (n int64, err error) {
data := f.data
if f.repacker != nil {
data, err = f.repacker(f.name, data, f.shape)
if err != nil {
return 0, err
}
}
if err := binary.Write(w, binary.LittleEndian, data); err != nil {
return 0, err
}
return int64(len(data) * 4), nil
}
func mul(shape []uint64) int {
n := 1
for _, dim := range shape {
n *= int(dim)
}
return n
}
func TestSplitDim(t *testing.T) {
r := fakeTensor{
name: "a.b",
shape: []uint64{3, 4},
data: []float32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
}
t.Run("no split", func(t *testing.T) {
for tt := range splitDim(&r, 0, split{Replacer: strings.NewReplacer("a", "x")}) {
if tt.Name != "x.b" {
t.Fatalf("expected name 'x', got '%s'", tt.Name)
}
if !slices.Equal(tt.Shape, []uint64{3, 4}) {
t.Fatalf("expected shape [3, 4], got %v", tt.Shape)
}
var b bytes.Buffer
if _, err := tt.WriteTo(&b); err != nil {
t.Fatal(err)
}
f32s := make([]float32, mul(tt.Shape))
if err := binary.Read(&b, binary.LittleEndian, &f32s); err != nil {
t.Fatal(err)
}
if !slices.Equal(f32s, []float32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) {
t.Fatalf("expected data [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], got %v", f32s)
}
}
})
t.Run("even split", func(t *testing.T) {
next, stop := iter.Pull(splitDim(&r, 1,
split{Replacer: strings.NewReplacer("a", "x")},
split{Replacer: strings.NewReplacer("b", "y")},
))
defer stop()
{
tt, ok := next()
if !ok {
t.Fatal("expected at least one split")
}
if tt.Name != "x.b" {
t.Fatal("expected name 'x.b', got", tt.Name)
}
if !slices.Equal(tt.Shape, []uint64{3, 2}) {
t.Fatal("expected shape [3, 2], got", tt.Shape)
}
var b bytes.Buffer
if _, err := tt.WriteTo(&b); err != nil {
t.Fatal(err)
}
f32s := make([]float32, mul(tt.Shape))
if err := binary.Read(&b, binary.LittleEndian, &f32s); err != nil {
t.Fatal(err)
}
if !slices.Equal(f32s, []float32{0, 1, 4, 5, 8, 9}) {
t.Fatal("expected data [0, 1, 4, 5, 8, 9], got", f32s)
}
}
{
tt, ok := next()
if !ok {
t.Fatal("expected at least one split")
}
if tt.Name != "a.y" {
t.Fatal("expected name 'a.y', got", tt.Name)
}
if !slices.Equal(tt.Shape, []uint64{3, 2}) {
t.Fatal("expected shape [3, 2], got", tt.Shape)
}
var b bytes.Buffer
if _, err := tt.WriteTo(&b); err != nil {
t.Fatal(err)
}
f32s := make([]float32, mul(tt.Shape))
if err := binary.Read(&b, binary.LittleEndian, &f32s); err != nil {
t.Fatal(err)
}
if !slices.Equal(f32s, []float32{2, 3, 6, 7, 10, 11}) {
t.Fatal("expected data [2, 3, 6, 7, 10, 11], got", f32s)
}
}
})
t.Run("uneven split", func(t *testing.T) {
next, stop := iter.Pull(splitDim(&r, 0,
split{Replacer: strings.NewReplacer("a", "x"), dim: 2},
split{Replacer: strings.NewReplacer("b", "y"), dim: 1},
))
defer stop()
{
tt, ok := next()
if !ok {
t.Fatal("expected at least one split")
}
if tt.Name != "x.b" {
t.Fatal("expected name 'x.b', got", tt.Name)
}
if !slices.Equal(tt.Shape, []uint64{2, 4}) {
t.Fatal("expected shape [2, 4], got", tt.Shape)
}
var b bytes.Buffer
if _, err := tt.WriteTo(&b); err != nil {
t.Fatal(err)
}
f32s := make([]float32, mul(tt.Shape))
if err := binary.Read(&b, binary.LittleEndian, &f32s); err != nil {
t.Fatal(err)
}
if !slices.Equal(f32s, []float32{0, 1, 2, 3, 4, 5, 6, 7}) {
t.Fatal("expected data [0, 1, 2, 3, 4, 5, 6, 7], got", f32s)
}
}
{
tt, ok := next()
if !ok {
t.Fatal("expected at least one split")
}
if tt.Name != "a.y" {
t.Fatal("expected name 'a.y', got", tt.Name)
}
if !slices.Equal(tt.Shape, []uint64{1, 4}) {
t.Fatal("expected shape [1, 4], got", tt.Shape)
}
var b bytes.Buffer
if _, err := tt.WriteTo(&b); err != nil {
t.Fatal(err)
}
f32s := make([]float32, mul(tt.Shape))
if err := binary.Read(&b, binary.LittleEndian, &f32s); err != nil {
t.Fatal(err)
}
if !slices.Equal(f32s, []float32{8, 9, 10, 11}) {
t.Fatal("expected data [8, 9, 10, 11], got", f32s)
}
}
})
t.Run("split with transpose", func(t *testing.T) {
next, stop := iter.Pull(splitDim(&r, 1,
split{Replacer: strings.NewReplacer("a", "x")},
split{Replacer: strings.NewReplacer("b", "y"), fn: func(tt tensor.Tensor) (tensor.Tensor, error) {
return tensor.Transpose(tt, 1, 0)
}},
))
defer stop()
{
tt, ok := next()
if !ok {
t.Fatal("expected at least one split")
}
if tt.Name != "x.b" {
t.Fatal("expected name 'x.b', got", tt.Name)
}
if !slices.Equal(tt.Shape, []uint64{3, 2}) {
t.Fatal("expected shape [3, 2], got", tt.Shape)
}
var b bytes.Buffer
if _, err := tt.WriteTo(&b); err != nil {
t.Fatal(err)
}
f32s := make([]float32, mul(tt.Shape))
if err := binary.Read(&b, binary.LittleEndian, &f32s); err != nil {
t.Fatal(err)
}
if !slices.Equal(f32s, []float32{0, 1, 4, 5, 8, 9}) {
t.Fatal("expected data [0, 1, 4, 5, 8, 9], got", f32s)
}
}
{
tt, ok := next()
if !ok {
t.Fatal("expected at least one split")
}
if tt.Name != "a.y" {
t.Fatal("expected name 'a.y', got", tt.Name)
}
if !slices.Equal(tt.Shape, []uint64{3, 2}) {
t.Fatal("expected shape [3, 2], got", tt.Shape)
}
var b bytes.Buffer
if _, err := tt.WriteTo(&b); err != nil {
t.Fatal(err)
}
f32s := make([]float32, mul(tt.Shape))
if err := binary.Read(&b, binary.LittleEndian, &f32s); err != nil {
t.Fatal(err)
}
if !slices.Equal(f32s, []float32{2, 6, 10, 3, 7, 11}) {
t.Fatal("expected data [2, 6, 10, 3, 7, 11], got", f32s)
}
}
})
}

View File

@@ -1,59 +0,0 @@
# Benchmark
Go benchmark tests that measure end-to-end performance of a running Ollama server. Run these tests to evaluate model inference performance on your hardware and measure the impact of code changes.
## When to use
Run these benchmarks when:
- Making changes to the model inference engine
- Modifying model loading/unloading logic
- Changing prompt processing or token generation code
- Implementing a new model architecture
- Testing performance across different hardware setups
## Prerequisites
- Ollama server running locally with `ollama serve` on `127.0.0.1:11434`
## Usage and Examples
>[!NOTE]
>All commands must be run from the root directory of the Ollama project.
Basic syntax:
```bash
go test -bench=. ./benchmark/... -m $MODEL_NAME
```
Required flags:
- `-bench=.`: Run all benchmarks
- `-m`: Model name to benchmark
Optional flags:
- `-count N`: Number of times to run the benchmark (useful for statistical analysis)
- `-timeout T`: Maximum time for the benchmark to run (e.g. "10m" for 10 minutes)
Common usage patterns:
Single benchmark run with a model specified:
```bash
go test -bench=. ./benchmark/... -m llama3.3
```
## Output metrics
The benchmark reports several key metrics:
- `gen_tok/s`: Generated tokens per second
- `prompt_tok/s`: Prompt processing tokens per second
- `ttft_ms`: Time to first token in milliseconds
- `load_ms`: Model load time in milliseconds
- `gen_tokens`: Total tokens generated
- `prompt_tokens`: Total prompt tokens processed
Each benchmark runs two scenarios:
- Cold start: Model is loaded from disk for each test
- Warm start: Model is pre-loaded in memory
Three prompt lengths are tested for each scenario:
- Short prompt (100 tokens)
- Medium prompt (500 tokens)
- Long prompt (1000 tokens)

View File

@@ -118,7 +118,7 @@ To run tests, use `go test`:
go test ./...
```
> NOTE: In rare cirumstances, you may nedd to change a package using the new
> NOTE: In rare cirumstances, you may need to change a package using the new
> "synctest" package in go1.24.
>
> If you do not have the "synctest" package enabled, you will not see build or

View File

@@ -112,8 +112,8 @@ sudo systemctl status ollama
> While AMD has contributed the `amdgpu` driver upstream to the official linux
> kernel source, the version is older and may not support all ROCm features. We
> recommend you install the latest driver from
> https://www.amd.com/en/support/linux-drivers for best support of your Radeon
> GPU.
> [AMD](https://www.amd.com/en/support/download/linux-drivers.html) for best support
> of your Radeon GPU.
## Customizing

View File

@@ -527,23 +527,17 @@ func WriteGGUF(f *os.File, kv KV, ts []*Tensor) error {
return err
}
keys := slices.Collect(maps.Keys(kv))
slices.Sort(keys)
for _, key := range keys {
for _, key := range slices.Sorted(maps.Keys(kv)) {
if err := ggufWriteKV(f, key, kv[key]); err != nil {
return err
}
}
slices.SortStableFunc(ts, func(a, b *Tensor) int {
if i, j := a.block(), b.block(); i < 0 && j > 0 {
return 1
} else if i > 0 && j < 0 {
return -1
} else {
if i, j := a.block(), b.block(); i > 0 && j > 0 {
return cmp.Compare(i, j)
}
return cmp.Compare(a.Name, b.Name)
})
var s uint64

View File

@@ -2,62 +2,82 @@ package ggml
import (
"bytes"
"math/rand/v2"
"os"
"slices"
"strings"
"testing"
"github.com/google/go-cmp/cmp"
)
func TestWriteGGUF(t *testing.T) {
w, err := os.CreateTemp(t.TempDir(), "*.bin")
if err != nil {
t.Fatal(err)
}
defer w.Close()
r := rand.New(rand.NewPCG(0, 0))
for range 8 {
t.Run("shuffle", func(t *testing.T) {
t.Parallel()
if err := WriteGGUF(w, KV{
"general.alignment": uint32(16),
}, []*Tensor{
{Name: "test.0", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
{Name: "test.1", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
{Name: "test.2", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
{Name: "test.3", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
{Name: "test.4", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
{Name: "test.5", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
}); err != nil {
t.Fatal(err)
}
ts := []*Tensor{
{Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
{Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
{Name: "blk.1.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
{Name: "blk.2.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
{Name: "blk.3.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
{Name: "blk.4.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
{Name: "blk.5.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
{Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewBuffer(make([]byte, 3*2))},
{Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewBuffer(make([]byte, 3*2))},
}
r, err := os.Open(w.Name())
if err != nil {
t.Fatal(err)
}
defer r.Close()
r.Shuffle(len(ts), func(i, j int) {
ts[i], ts[j] = ts[j], ts[i]
})
ff, err := Decode(r, 0)
if err != nil {
t.Fatal(err)
}
w, err := os.CreateTemp(t.TempDir(), strings.ReplaceAll(t.Name(), "/", "_")+"*.bin")
if err != nil {
t.Fatal(err)
}
defer w.Close()
if diff := cmp.Diff(ff.KV(), KV{
"general.alignment": uint32(16),
"general.parameter_count": uint64(36),
}); diff != "" {
t.Errorf("Mismatch (-want +got):\n%s", diff)
}
if err := WriteGGUF(w, KV{
"general.alignment": uint32(16),
}, ts); err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(ff.Tensors(), Tensors{
Offset: 336,
items: []*Tensor{
{Name: "test.0", Offset: 0, Shape: []uint64{2, 3}},
{Name: "test.1", Offset: 32, Shape: []uint64{2, 3}},
{Name: "test.2", Offset: 64, Shape: []uint64{2, 3}},
{Name: "test.3", Offset: 96, Shape: []uint64{2, 3}},
{Name: "test.4", Offset: 128, Shape: []uint64{2, 3}},
{Name: "test.5", Offset: 160, Shape: []uint64{2, 3}},
},
}, cmp.AllowUnexported(Tensors{})); diff != "" {
t.Errorf("Mismatch (-want +got):\n%s", diff)
r, err := os.Open(w.Name())
if err != nil {
t.Fatal(err)
}
defer r.Close()
ff, err := Decode(r, 0)
if err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(KV{
"general.alignment": uint32(16),
"general.parameter_count": uint64(54),
}, ff.KV()); diff != "" {
t.Errorf("Mismatch (-want +got):\n%s", diff)
}
if diff := cmp.Diff(Tensors{
Offset: 608,
items: []*Tensor{
{Name: "blk.0.attn_norm.weight", Offset: 0, Shape: []uint64{2, 3}},
{Name: "blk.1.attn_norm.weight", Offset: 32, Shape: []uint64{2, 3}},
{Name: "blk.2.attn_norm.weight", Offset: 64, Shape: []uint64{2, 3}},
{Name: "blk.3.attn_norm.weight", Offset: 96, Shape: []uint64{2, 3}},
{Name: "blk.4.attn_norm.weight", Offset: 128, Shape: []uint64{2, 3}},
{Name: "blk.5.attn_norm.weight", Offset: 160, Shape: []uint64{2, 3}},
{Name: "output.weight", Offset: 192, Shape: []uint64{3, 2}},
{Name: "output_norm.weight", Offset: 224, Shape: []uint64{3, 2}},
{Name: "token_embd.weight", Offset: 256, Shape: []uint64{2, 3}},
},
}, ff.Tensors(), cmp.AllowUnexported(Tensors{})); diff != "" {
t.Errorf("Mismatch (-want +got):\n%s", diff)
}
})
}
}

View File

@@ -0,0 +1,102 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@ollama.com>
Date: Thu, 24 Apr 2025 14:48:51 -0700
Subject: [PATCH] ggml: Export GPU UUIDs
This enables matching up devices and information reported by the backend
with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
---
ggml/include/ggml-backend.h | 1 +
ggml/src/ggml-cuda/ggml-cuda.cu | 33 ++++++++++++++++++++++++++++++++
ggml/src/ggml-metal/ggml-metal.m | 1 +
3 files changed, 35 insertions(+)
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index 74e46716..a880df33 100644
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
@@ -152,6 +152,7 @@ extern "C" {
struct ggml_backend_dev_props {
const char * name;
const char * description;
+ const char * uuid;
size_t memory_free;
size_t memory_total;
enum ggml_backend_dev_type type;
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index cb0d8528..4c829153 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
int device;
std::string name;
std::string description;
+ std::string uuid;
};
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
return ctx->description.c_str();
}
+static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
+ ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
+ return ctx->uuid.c_str();
+}
+
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
ggml_cuda_set_device(ctx->device);
@@ -2910,6 +2916,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
props->name = ggml_backend_cuda_device_get_name(dev);
props->description = ggml_backend_cuda_device_get_description(dev);
+ props->uuid = ggml_backend_cuda_device_get_uuid(dev);
props->type = ggml_backend_cuda_device_get_type(dev);
ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
dev_ctx->description = prop.name;
+ #if !defined(GGML_USE_HIP)
+ char uuid[64];
+ snprintf(uuid, sizeof(uuid),
+ "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ (unsigned char)prop.uuid.bytes[0],
+ (unsigned char)prop.uuid.bytes[1],
+ (unsigned char)prop.uuid.bytes[2],
+ (unsigned char)prop.uuid.bytes[3],
+ (unsigned char)prop.uuid.bytes[4],
+ (unsigned char)prop.uuid.bytes[5],
+ (unsigned char)prop.uuid.bytes[6],
+ (unsigned char)prop.uuid.bytes[7],
+ (unsigned char)prop.uuid.bytes[8],
+ (unsigned char)prop.uuid.bytes[9],
+ (unsigned char)prop.uuid.bytes[10],
+ (unsigned char)prop.uuid.bytes[11],
+ (unsigned char)prop.uuid.bytes[12],
+ (unsigned char)prop.uuid.bytes[13],
+ (unsigned char)prop.uuid.bytes[14],
+ (unsigned char)prop.uuid.bytes[15]
+ );
+ dev_ctx->uuid = uuid;
+ #else
+ dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
+ #endif
+
ggml_backend_dev_t dev = new ggml_backend_device {
/* .iface = */ ggml_backend_cuda_device_interface,
/* .reg = */ &reg,
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
index 1b56f858..ee4f2dcb 100644
--- a/ggml/src/ggml-metal/ggml-metal.m
+++ b/ggml/src/ggml-metal/ggml-metal.m
@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
props->name = ggml_backend_metal_device_get_name(dev);
props->description = ggml_backend_metal_device_get_description(dev);
+ props->uuid = "0";
props->type = ggml_backend_metal_device_get_type(dev);
ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
props->caps = (struct ggml_backend_dev_caps) {

View File

@@ -797,7 +797,8 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
res, err := http.DefaultClient.Do(serverReq)
if err != nil {
return fmt.Errorf("POST predict: %v", err)
slog.Error("post predict", "error", err)
return errors.New("model runner has unexpectedly stopped, this may be due to resource limitations or an internal error, check ollama server logs for details")
}
defer res.Body.Close()

View File

@@ -124,6 +124,10 @@ type DeviceMemory struct {
// may not be persistent across instances of the runner.
Name string
// UUID is a unique persistent identifier for the device for matching
// with system management libraries
UUID string
// Weights is the per-layer memory needed for the model weights.
Weights []Memory
@@ -152,6 +156,10 @@ func (m DeviceMemory) LogValue() slog.Value {
attrs = append(attrs, slog.Any("Graph", m.Graph))
}
if len(attrs) > 0 && m.UUID != "" {
attrs = append([]slog.Attr{slog.String("UUID", m.UUID)}, attrs...)
}
return slog.GroupValue(attrs...)
}

View File

@@ -136,6 +136,9 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
}
requiredMemory.CPU.Name = C.GoString(C.ggml_backend_dev_name(cpuDeviceBufferType.d))
var props C.struct_ggml_backend_dev_props
C.ggml_backend_dev_get_props(cpuDeviceBufferType.d, &props)
requiredMemory.CPU.UUID = C.GoString(props.uuid)
requiredMemory.CPU.Weights = make([]ml.Memory, blocks+1)
requiredMemory.CPU.Cache = make([]ml.Memory, blocks+1)
@@ -150,6 +153,9 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
})
btDeviceMemory[bt] = &requiredMemory.GPUs[i]
requiredMemory.GPUs[i].Name = C.GoString(C.ggml_backend_dev_name(d))
var props C.struct_ggml_backend_dev_props
C.ggml_backend_dev_get_props(d, &props)
requiredMemory.GPUs[i].UUID = C.GoString(props.uuid)
requiredMemory.GPUs[i].Weights = make([]ml.Memory, blocks+1)
requiredMemory.GPUs[i].Cache = make([]ml.Memory, blocks+1)
}

View File

@@ -152,6 +152,7 @@ extern "C" {
struct ggml_backend_dev_props {
const char * name;
const char * description;
const char * uuid;
size_t memory_free;
size_t memory_total;
enum ggml_backend_dev_type type;

View File

@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
int device;
std::string name;
std::string description;
std::string uuid;
};
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
return ctx->description.c_str();
}
static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
return ctx->uuid.c_str();
}
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
ggml_cuda_set_device(ctx->device);
@@ -2910,6 +2916,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
props->name = ggml_backend_cuda_device_get_name(dev);
props->description = ggml_backend_cuda_device_get_description(dev);
props->uuid = ggml_backend_cuda_device_get_uuid(dev);
props->type = ggml_backend_cuda_device_get_type(dev);
ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
dev_ctx->description = prop.name;
#if !defined(GGML_USE_HIP)
char uuid[64];
snprintf(uuid, sizeof(uuid),
"GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
(unsigned char)prop.uuid.bytes[0],
(unsigned char)prop.uuid.bytes[1],
(unsigned char)prop.uuid.bytes[2],
(unsigned char)prop.uuid.bytes[3],
(unsigned char)prop.uuid.bytes[4],
(unsigned char)prop.uuid.bytes[5],
(unsigned char)prop.uuid.bytes[6],
(unsigned char)prop.uuid.bytes[7],
(unsigned char)prop.uuid.bytes[8],
(unsigned char)prop.uuid.bytes[9],
(unsigned char)prop.uuid.bytes[10],
(unsigned char)prop.uuid.bytes[11],
(unsigned char)prop.uuid.bytes[12],
(unsigned char)prop.uuid.bytes[13],
(unsigned char)prop.uuid.bytes[14],
(unsigned char)prop.uuid.bytes[15]
);
dev_ctx->uuid = uuid;
#else
dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
#endif
ggml_backend_dev_t dev = new ggml_backend_device {
/* .iface = */ ggml_backend_cuda_device_interface,
/* .reg = */ &reg,

View File

@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
props->name = ggml_backend_metal_device_get_name(dev);
props->description = ggml_backend_metal_device_get_description(dev);
props->uuid = "0";
props->type = ggml_backend_metal_device_get_type(dev);
ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
props->caps = (struct ggml_backend_dev_caps) {

View File

@@ -63,9 +63,9 @@ func (mlp *TextMLP) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *TextOp
}
type TextExperts struct {
Gate ml.Tensor `gguf:"ffn_gate_exps.weight"`
Up ml.Tensor `gguf:"ffn_up_exps.weight"`
Down ml.Tensor `gguf:"ffn_down_exps.weight"`
Gate *nn.Linear `gguf:"ffn_gate_exps"`
Up *nn.Linear `gguf:"ffn_up_exps"`
Down *nn.Linear `gguf:"ffn_down_exps"`
}
func (e *TextExperts) Forward(ctx ml.Context, hiddenStates, routerLogits ml.Tensor, opts *TextOptions) ml.Tensor {
@@ -76,9 +76,9 @@ func (e *TextExperts) Forward(ctx ml.Context, hiddenStates, routerLogits ml.Tens
hiddenStates = hiddenStates.Repeat(ctx, 1, opts.numExpertsUsed)
hiddenStates = hiddenStates.Mul(ctx, scores)
upStates := e.Up.MulmatID(ctx, hiddenStates, experts)
gateStates := e.Gate.MulmatID(ctx, hiddenStates, experts)
downStates := e.Down.MulmatID(ctx, upStates.Mul(ctx, gateStates.SILU(ctx)), experts)
upStates := e.Up.Weight.MulmatID(ctx, hiddenStates, experts)
gateStates := e.Gate.Weight.MulmatID(ctx, hiddenStates, experts)
downStates := e.Down.Weight.MulmatID(ctx, upStates.Mul(ctx, gateStates.SILU(ctx)), experts)
nextStates := downStates.View(ctx, 0, hiddenStates.Dim(0), downStates.Stride(2), hiddenStates.Dim(2))
for i := 1; i < opts.numExpertsUsed; i++ {

View File

@@ -66,9 +66,9 @@ type MLP interface {
type sparse struct {
Router *nn.Linear `gguf:"ffn_gate_inp"`
Gate ml.Tensor `gguf:"ffn_gate_exps.weight"`
Up ml.Tensor `gguf:"ffn_up_exps.weight"`
Down ml.Tensor `gguf:"ffn_down_exps.weight"`
Gate *nn.Linear `gguf:"ffn_gate_exps"`
Up *nn.Linear `gguf:"ffn_up_exps"`
Down *nn.Linear `gguf:"ffn_down_exps"`
}
func (mlp *sparse) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *Options) ml.Tensor {
@@ -87,13 +87,13 @@ func (mlp *sparse) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *Options
hiddenStates = hiddenStates.Reshape(ctx, hiddenStates.Dim(0), 1, hiddenStates.Dim(1))
upStates := mlp.Up.MulmatID(ctx, hiddenStates, selectedExperts)
upStates := mlp.Up.Weight.MulmatID(ctx, hiddenStates, selectedExperts)
hiddenStates = mlp.Gate.MulmatID(ctx, hiddenStates, selectedExperts)
hiddenStates = mlp.Gate.Weight.MulmatID(ctx, hiddenStates, selectedExperts)
hiddenStates = hiddenStates.SILU(ctx)
hiddenStates = hiddenStates.Mul(ctx, upStates)
experts := mlp.Down.MulmatID(ctx, hiddenStates, selectedExperts)
experts := mlp.Down.Weight.MulmatID(ctx, hiddenStates, selectedExperts)
experts = experts.Mul(ctx, routingWeights)
nextStates := experts.View(ctx, 0, experts.Dim(0), experts.Stride(2), experts.Dim(2))

View File

@@ -87,7 +87,7 @@ func (v *Vocabulary) Decode(id int32) string {
func (v *Vocabulary) SpecialVocabulary() []string {
v.specialOnce.Do(func() {
for i := range v.Values {
if v.Types[i] == TOKEN_TYPE_CONTROL {
if v.Types[i] == TOKEN_TYPE_CONTROL || v.Types[i] == TOKEN_TYPE_USER_DEFINED {
v.special = append(v.special, v.Values[i])
}
}

16
model/vocabulary_test.go Normal file
View File

@@ -0,0 +1,16 @@
package model
import "testing"
func TestVocabulary_SpecialVocabulary(t *testing.T) {
vocab := &Vocabulary{
Values: []string{"<|startoftext|>", "<|endoftext|>", "<|tool_call_start|>", "<|tool_call_end|>", "hi"},
Types: []int32{TOKEN_TYPE_CONTROL, TOKEN_TYPE_CONTROL, TOKEN_TYPE_USER_DEFINED, TOKEN_TYPE_USER_DEFINED, TOKEN_TYPE_NORMAL},
}
specialVocab := vocab.SpecialVocabulary()
if len(specialVocab) != 4 {
t.Errorf("expected 4 special tokens, got %d", len(specialVocab))
}
}

View File

@@ -292,13 +292,18 @@ func filesForModel(path string) ([]string, error) {
}
files = append(files, js...)
if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
// tokenizer.model might be a unresolved git lfs reference; error if it is
files = append(files, tks...)
} else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 {
// some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B)
files = append(files, tks...)
// only include tokenizer.model is tokenizer.json is not present
if !slices.ContainsFunc(files, func(s string) bool {
return slices.Contains(strings.Split(s, string(os.PathSeparator)), "tokenizer.json")
}) {
if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
// tokenizer.model might be a unresolved git lfs reference; error if it is
files = append(files, tks...)
} else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 {
// some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B)
files = append(files, tks...)
}
}
return files, nil

View File

@@ -26,6 +26,7 @@ import (
"github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/thinking"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
)
@@ -113,7 +114,7 @@ func (m *Model) Capabilities() []model.Capability {
}
// Check for thinking capability
openingTag, closingTag := inferThinkingTags(m.Template.Template)
openingTag, closingTag := thinking.InferTags(m.Template.Template)
if openingTag != "" && closingTag != "" {
capabilities = append(capabilities, model.CapabilityThinking)
}

View File

@@ -59,7 +59,7 @@ type DiskCache struct {
testHookBeforeFinalWrite func(f *os.File)
}
// PutString is a convenience function for c.Put(d, strings.NewReader(s), int64(len(s))).
// PutBytes is a convenience function for c.Put(d, strings.NewReader(s), int64(len(s))).
func PutBytes[S string | []byte](c *DiskCache, d Digest, data S) error {
return c.Put(d, bytes.NewReader([]byte(data)), int64(len(data)))
}

View File

@@ -37,6 +37,7 @@ import (
"github.com/ollama/ollama/server/internal/client/ollama"
"github.com/ollama/ollama/server/internal/registry"
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/thinking"
"github.com/ollama/ollama/tools"
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
@@ -282,12 +283,12 @@ func (s *Server) GenerateHandler(c *gin.Context) {
prompt = b.String()
}
var thinkingState *thinkingParser
openingTag, closingTag := inferThinkingTags(m.Template.Template)
var thinkingState *thinking.Parser
openingTag, closingTag := thinking.InferTags(m.Template.Template)
if req.Think != nil && *req.Think && openingTag != "" && closingTag != "" {
thinkingState = &thinkingParser{
openingTag: openingTag,
closingTag: closingTag,
thinkingState = &thinking.Parser{
OpeningTag: openingTag,
ClosingTag: closingTag,
}
}
@@ -316,7 +317,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
}
if thinkingState != nil {
thinking, content := thinkingState.addContent(cr.Content)
thinking, content := thinkingState.AddContent(cr.Content)
res.Thinking = thinking
res.Response = content
}
@@ -1514,23 +1515,18 @@ func (s *Server) ChatHandler(c *gin.Context) {
return
}
var thinkingState *thinkingParser
openingTag, closingTag := inferThinkingTags(m.Template.Template)
var thinkingState *thinking.Parser
openingTag, closingTag := thinking.InferTags(m.Template.Template)
if req.Think != nil && *req.Think && openingTag != "" && closingTag != "" {
thinkingState = &thinkingParser{
openingTag: openingTag,
closingTag: closingTag,
thinkingState = &thinking.Parser{
OpeningTag: openingTag,
ClosingTag: closingTag,
}
}
var toolParser *tools.Parser
if len(req.Tools) > 0 {
toolParser, err = tools.NewParser(m.Template.Template)
if err != nil {
slog.Error("failed to create tool parser", "error", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
toolParser = tools.NewParser(m.Template.Template, req.Tools)
}
ch := make(chan any)
@@ -1557,7 +1553,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
}
if thinkingState != nil {
thinkingContent, remainingContent := thinkingState.addContent(res.Message.Content)
thinkingContent, remainingContent := thinkingState.AddContent(res.Message.Content)
if thinkingContent == "" && remainingContent == "" && !r.Done {
// need to accumulate more to decide what to send
return
@@ -1583,6 +1579,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
// don't return
} else {
if r.Done {
res.Message.Content = toolParser.Content()
ch <- res
}
return
@@ -1668,11 +1665,11 @@ func filterThinkTags(msgs []api.Message, m *Model) []api.Message {
// change the user output), we should probably perform this filtering
// for all thinking models (not just qwen3 & deepseek-r1) since it tends
// to save tokens and improve quality.
thinkingState := &thinkingParser{
openingTag: "<think>",
closingTag: "</think>",
thinkingState := &thinking.Parser{
OpeningTag: "<think>",
ClosingTag: "</think>",
}
_, content := thinkingState.addContent(msg.Content)
_, content := thinkingState.AddContent(msg.Content)
msgs[i].Content = content
}
}

View File

@@ -1,9 +1,7 @@
package server
package thinking
import (
"strings"
"text/template"
"text/template/parse"
"unicode"
)
@@ -46,17 +44,17 @@ func (s thinkingState) String() string {
}
}
type thinkingParser struct {
type Parser struct {
state thinkingState
openingTag string
closingTag string
OpeningTag string
ClosingTag string
acc strings.Builder
}
// addContent returns the thinking content and the non-thinking content that
// AddContent returns the thinking content and the non-thinking content that
// should be immediately sent to the user. It will internally buffer if it needs
// to see more raw content to disambiguate
func (s *thinkingParser) addContent(content string) (string, string) {
func (s *Parser) AddContent(content string) (string, string) {
s.acc.WriteString(content)
var thinkingSb, remainingSb strings.Builder
@@ -76,12 +74,12 @@ func (s *thinkingParser) addContent(content string) (string, string) {
}
// the additional bool return is true iff we should continue eating
func eat(s *thinkingParser) (string, string, bool) {
func eat(s *Parser) (string, string, bool) {
switch s.state {
case thinkingState_LookingForOpening:
trimmed := strings.TrimLeftFunc(s.acc.String(), unicode.IsSpace)
if strings.HasPrefix(trimmed, s.openingTag) {
after := strings.Join(strings.Split(trimmed, s.openingTag)[1:], s.openingTag)
if strings.HasPrefix(trimmed, s.OpeningTag) {
after := strings.Join(strings.Split(trimmed, s.OpeningTag)[1:], s.OpeningTag)
after = strings.TrimLeftFunc(after, unicode.IsSpace)
// after might contain more than just thinking tokens, so we continue
// parsing instead of returning it as thinking tokens here
@@ -93,7 +91,7 @@ func eat(s *thinkingParser) (string, string, bool) {
s.state = thinkingState_Thinking
}
return "", "", true
} else if strings.HasPrefix(s.openingTag, trimmed) {
} else if strings.HasPrefix(s.OpeningTag, trimmed) {
// partial opening seen, so let's keep accumulating
return "", "", false
} else if trimmed == "" {
@@ -119,10 +117,10 @@ func eat(s *thinkingParser) (string, string, bool) {
}
case thinkingState_Thinking:
acc := s.acc.String()
if strings.Contains(acc, s.closingTag) {
split := strings.Split(acc, s.closingTag)
if strings.Contains(acc, s.ClosingTag) {
split := strings.Split(acc, s.ClosingTag)
thinking := split[0]
remaining := strings.Join(split[1:], s.closingTag)
remaining := strings.Join(split[1:], s.ClosingTag)
remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
s.acc.Reset()
if remaining == "" {
@@ -131,7 +129,7 @@ func eat(s *thinkingParser) (string, string, bool) {
s.state = thinkingState_ThinkingDone
}
return thinking, remaining, false
} else if overlapLen := overlap(acc, s.closingTag); overlapLen > 0 {
} else if overlapLen := overlap(acc, s.ClosingTag); overlapLen > 0 {
thinking := acc[:len(acc)-overlapLen]
remaining := acc[len(acc)-overlapLen:]
s.acc.Reset()
@@ -171,130 +169,3 @@ func overlap(s, delim string) int {
}
return 0
}
func templateVisit(n parse.Node, enterFn func(parse.Node) bool, exitFn func(parse.Node)) {
if n == nil {
return
}
shouldContinue := enterFn(n)
if !shouldContinue {
return
}
switch x := n.(type) {
case *parse.ListNode:
for _, c := range x.Nodes {
templateVisit(c, enterFn, exitFn)
}
case *parse.BranchNode:
if x.Pipe != nil {
templateVisit(x.Pipe, enterFn, exitFn)
}
if x.List != nil {
templateVisit(x.List, enterFn, exitFn)
}
if x.ElseList != nil {
templateVisit(x.ElseList, enterFn, exitFn)
}
case *parse.ActionNode:
templateVisit(x.Pipe, enterFn, exitFn)
case *parse.WithNode:
templateVisit(&x.BranchNode, enterFn, exitFn)
case *parse.RangeNode:
templateVisit(&x.BranchNode, enterFn, exitFn)
case *parse.IfNode:
templateVisit(&x.BranchNode, enterFn, exitFn)
case *parse.TemplateNode:
templateVisit(x.Pipe, enterFn, exitFn)
case *parse.PipeNode:
for _, c := range x.Cmds {
templateVisit(c, enterFn, exitFn)
}
case *parse.CommandNode:
for _, a := range x.Args {
templateVisit(a, enterFn, exitFn)
}
// text, field, number, etc. are leaves nothing to recurse into
}
if exitFn != nil {
exitFn(n)
}
}
// We use a heuristic to infer the tags that surround thinking traces:
// We look for a range node that iterates over "Messages" and then look for a
// reference to "Thinking" like `{{.Thinking}}`. We then go up to the nearest
// ListNode and take the first and last TextNodes as the opening and closing
// tags.
func inferThinkingTags(t *template.Template) (string, string) {
ancestors := []parse.Node{}
openingTag := ""
closingTag := ""
enterFn := func(n parse.Node) bool {
ancestors = append(ancestors, n)
switch x := n.(type) {
case *parse.FieldNode:
if len(x.Ident) > 0 && x.Ident[0] == "Thinking" {
var mostRecentRange *parse.RangeNode
for i := len(ancestors) - 1; i >= 0; i-- {
if r, ok := ancestors[i].(*parse.RangeNode); ok {
mostRecentRange = r
break
}
}
if mostRecentRange == nil || !rangeUsesField(mostRecentRange, "Messages") {
return true
}
// TODO(drifkin): to be more robust, check that it's in the action
// part, not the `if`'s pipeline part. We do match on the nearest list
// that starts and ends with text nodes, which makes this not strictly
// necessary for our heuristic
// go up to the nearest ancestor that is a *parse.ListNode
for i := len(ancestors) - 1; i >= 0; i-- {
if l, ok := ancestors[i].(*parse.ListNode); ok {
firstNode := l.Nodes[0]
if t, ok := firstNode.(*parse.TextNode); ok {
openingTag = strings.TrimSpace(t.String())
}
lastNode := l.Nodes[len(l.Nodes)-1]
if t, ok := lastNode.(*parse.TextNode); ok {
closingTag = strings.TrimSpace(t.String())
}
break
}
}
}
}
return true
}
exitFn := func(n parse.Node) {
ancestors = ancestors[:len(ancestors)-1]
}
templateVisit(t.Root, enterFn, exitFn)
return openingTag, closingTag
}
// checks to see if the given field name is present in the pipeline of the given range node
func rangeUsesField(rangeNode *parse.RangeNode, field string) bool {
found := false
enterFn := func(n parse.Node) bool {
switch x := n.(type) {
case *parse.FieldNode:
if x.Ident[0] == field {
found = true
}
}
return true
}
templateVisit(rangeNode.BranchNode.Pipe, enterFn, nil)
return found
}

View File

@@ -1,8 +1,7 @@
package server
package thinking
import (
"testing"
"text/template"
)
func TestExtractThinking(t *testing.T) {
@@ -26,11 +25,11 @@ func TestExtractThinking(t *testing.T) {
},
}
for i, tt := range tests {
parser := thinkingParser{
openingTag: "<think>",
closingTag: "</think>",
parser := Parser{
OpeningTag: "<think>",
ClosingTag: "</think>",
}
gotThinking, gotContent := parser.addContent(tt.in)
gotThinking, gotContent := parser.AddContent(tt.in)
if gotContent != tt.wantContent || gotThinking != tt.wantThink {
t.Errorf("case %d: got (%q,%q), want (%q,%q)", i, gotThinking, gotContent, tt.wantThink, tt.wantContent)
}
@@ -259,15 +258,15 @@ func TestThinkingStreaming(t *testing.T) {
}
for _, c := range cases {
parser := thinkingParser{
openingTag: "<think>",
closingTag: "</think>",
parser := Parser{
OpeningTag: "<think>",
ClosingTag: "</think>",
}
if c.skip {
continue
}
for i, step := range c.steps {
thinking, content := parser.addContent(step.input)
thinking, content := parser.AddContent(step.input)
if content != step.wantContent || thinking != step.wantThinking {
t.Errorf("case %q (step %d): got (%q,%q), want (%q,%q)", c.desc, i, content, thinking, step.wantContent, step.wantThinking)
}
@@ -277,127 +276,3 @@ func TestThinkingStreaming(t *testing.T) {
}
}
}
func TestInferThinkingTags(t *testing.T) {
cases := []struct {
desc string
tmplString string
wantOpeningTag string
wantClosingTag string
}{
{
desc: "basic",
tmplString: `
{{ if .Thinking}}
/think
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{ if and $last .Thinking }}
<think>{{ .Thinking }}</think>
{{ end }}
{{ end }}
`,
wantOpeningTag: "<think>",
wantClosingTag: "</think>",
},
{
desc: "doubly nested range",
tmplString: `
{{ if .Thinking}}
/think
{{ end }}
{{- range $i, $_ := .Messages }}
{{- range $j, $_ := .NotMessages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{ if and $last .Thinking }}
<think>{{ .Thinking }}</think>
{{ end }}
{{ end }}
{{ end }}
`,
wantOpeningTag: "",
wantClosingTag: "",
},
{
desc: "whitespace is trimmed",
tmplString: `
{{ if .Thinking}}
/think
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{ if and $last .Thinking }}
Some text before {{ .Thinking }} Some text after
{{ end }}
{{ end }}
`,
wantOpeningTag: "Some text before",
wantClosingTag: "Some text after",
},
{
desc: "qwen3",
tmplString: `
{{- if or .System .Tools .Thinking }}<|im_start|>system
{{- if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end }}
{{- if .Thinking }}
/think
{{- else }}
/no_think
{{- end }}<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if and $last .Thinking }}
<think>{{ .Thinking }}</think>
{{ end }}
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
`,
wantOpeningTag: "<think>",
wantClosingTag: "</think>",
},
}
for _, c := range cases {
tmpl := template.Must(template.New("test").Parse(c.tmplString))
openingTag, closingTag := inferThinkingTags(tmpl)
if openingTag != c.wantOpeningTag || closingTag != c.wantClosingTag {
t.Errorf("case %q: got (%q,%q), want (%q,%q)", c.desc, openingTag, closingTag, c.wantOpeningTag, c.wantClosingTag)
}
}
}

134
thinking/template.go Normal file
View File

@@ -0,0 +1,134 @@
package thinking
import (
"strings"
"text/template"
"text/template/parse"
)
func templateVisit(n parse.Node, enterFn func(parse.Node) bool, exitFn func(parse.Node)) {
if n == nil {
return
}
shouldContinue := enterFn(n)
if !shouldContinue {
return
}
switch x := n.(type) {
case *parse.ListNode:
for _, c := range x.Nodes {
templateVisit(c, enterFn, exitFn)
}
case *parse.BranchNode:
if x.Pipe != nil {
templateVisit(x.Pipe, enterFn, exitFn)
}
if x.List != nil {
templateVisit(x.List, enterFn, exitFn)
}
if x.ElseList != nil {
templateVisit(x.ElseList, enterFn, exitFn)
}
case *parse.ActionNode:
templateVisit(x.Pipe, enterFn, exitFn)
case *parse.WithNode:
templateVisit(&x.BranchNode, enterFn, exitFn)
case *parse.RangeNode:
templateVisit(&x.BranchNode, enterFn, exitFn)
case *parse.IfNode:
templateVisit(&x.BranchNode, enterFn, exitFn)
case *parse.TemplateNode:
templateVisit(x.Pipe, enterFn, exitFn)
case *parse.PipeNode:
for _, c := range x.Cmds {
templateVisit(c, enterFn, exitFn)
}
case *parse.CommandNode:
for _, a := range x.Args {
templateVisit(a, enterFn, exitFn)
}
// text, field, number, etc. are leaves nothing to recurse into
}
if exitFn != nil {
exitFn(n)
}
}
// InferTags uses a heuristic to infer the tags that surround thinking traces:
// We look for a range node that iterates over "Messages" and then look for a
// reference to "Thinking" like `{{.Thinking}}`. We then go up to the nearest
// ListNode and take the first and last TextNodes as the opening and closing
// tags.
func InferTags(t *template.Template) (string, string) {
ancestors := []parse.Node{}
openingTag := ""
closingTag := ""
enterFn := func(n parse.Node) bool {
ancestors = append(ancestors, n)
switch x := n.(type) {
case *parse.FieldNode:
if len(x.Ident) > 0 && x.Ident[0] == "Thinking" {
var mostRecentRange *parse.RangeNode
for i := len(ancestors) - 1; i >= 0; i-- {
if r, ok := ancestors[i].(*parse.RangeNode); ok {
mostRecentRange = r
break
}
}
if mostRecentRange == nil || !rangeUsesField(mostRecentRange, "Messages") {
return true
}
// TODO(drifkin): to be more robust, check that it's in the action
// part, not the `if`'s pipeline part. We do match on the nearest list
// that starts and ends with text nodes, which makes this not strictly
// necessary for our heuristic
// go up to the nearest ancestor that is a *parse.ListNode
for i := len(ancestors) - 1; i >= 0; i-- {
if l, ok := ancestors[i].(*parse.ListNode); ok {
firstNode := l.Nodes[0]
if t, ok := firstNode.(*parse.TextNode); ok {
openingTag = strings.TrimSpace(t.String())
}
lastNode := l.Nodes[len(l.Nodes)-1]
if t, ok := lastNode.(*parse.TextNode); ok {
closingTag = strings.TrimSpace(t.String())
}
break
}
}
}
}
return true
}
exitFn := func(n parse.Node) {
ancestors = ancestors[:len(ancestors)-1]
}
templateVisit(t.Root, enterFn, exitFn)
return openingTag, closingTag
}
// checks to see if the given field name is present in the pipeline of the given range node
func rangeUsesField(rangeNode *parse.RangeNode, field string) bool {
found := false
enterFn := func(n parse.Node) bool {
switch x := n.(type) {
case *parse.FieldNode:
if x.Ident[0] == field {
found = true
}
}
return true
}
templateVisit(rangeNode.BranchNode.Pipe, enterFn, nil)
return found
}

130
thinking/template_test.go Normal file
View File

@@ -0,0 +1,130 @@
package thinking
import (
"testing"
"text/template"
)
func TestInferThinkingTags(t *testing.T) {
cases := []struct {
desc string
tmplString string
wantOpeningTag string
wantClosingTag string
}{
{
desc: "basic",
tmplString: `
{{ if .Thinking}}
/think
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{ if and $last .Thinking }}
<think>{{ .Thinking }}</think>
{{ end }}
{{ end }}
`,
wantOpeningTag: "<think>",
wantClosingTag: "</think>",
},
{
desc: "doubly nested range",
tmplString: `
{{ if .Thinking}}
/think
{{ end }}
{{- range $i, $_ := .Messages }}
{{- range $j, $_ := .NotMessages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{ if and $last .Thinking }}
<think>{{ .Thinking }}</think>
{{ end }}
{{ end }}
{{ end }}
`,
wantOpeningTag: "",
wantClosingTag: "",
},
{
desc: "whitespace is trimmed",
tmplString: `
{{ if .Thinking}}
/think
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{ if and $last .Thinking }}
Some text before {{ .Thinking }} Some text after
{{ end }}
{{ end }}
`,
wantOpeningTag: "Some text before",
wantClosingTag: "Some text after",
},
{
desc: "qwen3",
tmplString: `
{{- if or .System .Tools .Thinking }}<|im_start|>system
{{- if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end }}
{{- if .Thinking }}
/think
{{- else }}
/no_think
{{- end }}<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if and $last .Thinking }}
<think>{{ .Thinking }}</think>
{{ end }}
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
`,
wantOpeningTag: "<think>",
wantClosingTag: "</think>",
},
}
for _, c := range cases {
tmpl := template.Must(template.New("test").Parse(c.tmplString))
openingTag, closingTag := InferTags(tmpl)
if openingTag != c.wantOpeningTag || closingTag != c.wantClosingTag {
t.Errorf("case %q: got (%q,%q), want (%q,%q)", c.desc, openingTag, closingTag, c.wantOpeningTag, c.wantClosingTag)
}
}
}

156
tools/template.go Normal file
View File

@@ -0,0 +1,156 @@
package tools
import (
"bytes"
"log/slog"
"slices"
"strings"
"text/template"
"text/template/parse"
)
// parseTag finds the tool calling tag from a Go template
// often <tool_call> [TOOL_CALL] or similar by finding the
// first text node after .ToolCalls and returning the content
// if no tag is found, return "{" to indicate that json objects
// should be attempted to be parsed as tool calls
func parseTag(tmpl *template.Template) string {
if tmpl == nil || tmpl.Tree == nil {
slog.Debug("template or tree is nil")
return "{"
}
tc := findToolCallNode(tmpl.Tree.Root.Nodes)
if tc == nil {
return "{"
}
tn := findTextNode(tc.List.Nodes)
if tn == nil {
return "{"
}
tag := string(tn.Text)
tag = strings.ReplaceAll(tag, "\r\n", "\n")
// avoid parsing { onwards as this may be a tool call
// however keep '{' as a prefix if there is no tag
// so that all json objects will be attempted to
// be parsed as tool calls
tag, _, _ = strings.Cut(tag, "{")
tag = strings.TrimSpace(tag)
if tag == "" {
tag = "{"
}
return tag
}
// findToolCallNode searches for and returns an IfNode with .ToolCalls
func findToolCallNode(nodes []parse.Node) *parse.IfNode {
isToolCallsNode := func(n *parse.IfNode) bool {
for _, cmd := range n.Pipe.Cmds {
for _, arg := range cmd.Args {
if field, ok := arg.(*parse.FieldNode); ok {
if slices.Contains(field.Ident, "ToolCalls") {
return true
}
}
}
}
return false
}
for _, node := range nodes {
switch n := node.(type) {
case *parse.IfNode:
if isToolCallsNode(n) {
return n
}
// Recursively search in nested IfNodes
if result := findToolCallNode(n.List.Nodes); result != nil {
return result
}
if n.ElseList != nil {
if result := findToolCallNode(n.ElseList.Nodes); result != nil {
return result
}
}
case *parse.ListNode:
if result := findToolCallNode(n.Nodes); result != nil {
return result
}
case *parse.RangeNode:
if result := findToolCallNode(n.List.Nodes); result != nil {
return result
}
if n.ElseList != nil {
if result := findToolCallNode(n.ElseList.Nodes); result != nil {
return result
}
}
case *parse.WithNode:
if result := findToolCallNode(n.List.Nodes); result != nil {
return result
}
if n.ElseList != nil {
if result := findToolCallNode(n.ElseList.Nodes); result != nil {
return result
}
}
}
}
return nil
}
// findTextNode does a depth-first search for the first text content in nodes,
// stopping at template constructs to avoid parsing text after the tool calls
func findTextNode(nodes []parse.Node) *parse.TextNode {
for _, node := range nodes {
switch n := node.(type) {
case *parse.TextNode:
// skip whitespace-only text nodes
if len(bytes.TrimSpace(n.Text)) == 0 {
continue
}
return n
case *parse.IfNode:
if text := findTextNode(n.List.Nodes); text != nil {
return text
}
if n.ElseList != nil {
if text := findTextNode(n.ElseList.Nodes); text != nil {
return text
}
}
return nil
case *parse.ListNode:
if text := findTextNode(n.Nodes); text != nil {
return text
}
case *parse.RangeNode:
if text := findTextNode(n.List.Nodes); text != nil {
return text
}
if n.ElseList != nil {
if text := findTextNode(n.ElseList.Nodes); text != nil {
return text
}
}
return nil
case *parse.WithNode:
if text := findTextNode(n.List.Nodes); text != nil {
return text
}
if n.ElseList != nil {
if text := findTextNode(n.ElseList.Nodes); text != nil {
return text
}
}
return nil
case *parse.ActionNode:
return nil
}
}
return nil
}

139
tools/template_test.go Normal file
View File

@@ -0,0 +1,139 @@
package tools
import (
"testing"
"text/template"
)
func TestParseTag(t *testing.T) {
cases := []struct {
name string
template string
want string
}{
{
name: "empty",
template: "",
want: "{",
},
{
name: "no tag",
template: "{{if .ToolCalls}}{{end}}",
want: "{",
},
{
name: "no tag with range",
template: "{{if .ToolCalls}}{{range .ToolCalls}}{{ . }}{{end}}{{end}}",
want: "{",
},
{
name: "tool call with json format",
template: "{{if .ToolCalls}}```json\n{{end}}",
want: "```json",
},
{
name: "square brackets",
template: "{{if .ToolCalls}}[{{range .ToolCalls}}{{ . }}{{end}}]{{end}}",
want: "[",
},
{
name: "square brackets with whitespace",
template: "{{if .ToolCalls}}\n [ {{range .ToolCalls}}{{ . }}{{end}}]{{end}}",
want: "[",
},
{
name: "tailing ]",
template: "{{if .ToolCalls}}{{range .ToolCalls}}{{ . }}{{end}}]{{end}}",
want: "{",
},
{
name: "whitespace only",
template: "{{if .ToolCalls}} {{range .ToolCalls}}{{ . }}{{end}}{{end}}",
want: "{",
},
{
name: "whitespace only in range",
template: "{{if .ToolCalls}}{{range .ToolCalls}}\n{{ . }}\n{{end}}{{end}}",
want: "{",
},
{
name: "json objects",
template: `{{if .ToolCalls}}{{range .ToolCalls}}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}{{end}}{{end}}`,
want: "{",
},
{
name: "json objects with whitespace",
template: "{{if .ToolCalls}}{{range .ToolCalls}}\n{\"name\": \"{{ .Function.Name }}\", \"arguments\": {{ .Function.Arguments }}}{{end}}{{end}}",
want: "{",
},
{
name: "json objects with CRLF",
template: "{{if .ToolCalls}}{{range .ToolCalls}}\r\n{\"name\": \"{{ .Function.Name }}\", \"arguments\": {{ .Function.Arguments }}}{{end}}{{end}}",
want: "{",
},
{
name: "json objects with whitespace before and after range",
template: "{{if .ToolCalls}}\n{{range .ToolCalls}}\n{\"name\": \"{{ .Function.Name }}\", \"arguments\": {{ .Function.Arguments }}}\r\n{{end}}\r\n{{end}}",
want: "{",
},
{
name: "before and after range",
template: "{{if .ToolCalls}}<|tool▁calls▁begin|>{{range .ToolCalls}}<|tool▁call▁begin|>functionget_current_weather\n```json\n{\"location\": \"Tokyo\"}\n```<|tool▁call▁end|>\n{{end}}<|tool▁calls▁end|>{{end}}",
want: "<|tool▁calls▁begin|>",
},
{
name: "after range",
template: "{{if .ToolCalls}}{{range .ToolCalls}}<tool_call>{\"name\": \"{{ .Function.Name }}\", \"arguments\": {{ .Function.Arguments }}}</tool_call>{{end}}{{end}}",
want: "<tool_call>",
},
{
name: "after range with leading whitespace before range",
template: "{{if .ToolCalls}}\n{{range .ToolCalls}}<tool_call>{\"name\": \"{{ .Function.Name }}\", \"arguments\": {{ .Function.Arguments }}}</tool_call>{{end}}{{end}}",
want: "<tool_call>",
},
{
name: "tool call in range with {",
template: `{{if .ToolCalls}}{{range .ToolCalls}}<tool_call>{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}<tool_call>{{end}}{{end}}`,
want: "<tool_call>",
},
{
name: "tool call with multiple text nodes",
template: "{{if .ToolCalls}}First text{{if .Something}}inner{{end}}Second text{{end}}",
want: "First text",
},
{
name: "action tag",
template: "{{if .ToolCalls}}Action: ```json{{end}}",
want: "Action: ```json",
},
{
name: "incomplete functools bracket",
template: "{{if .ToolCalls}}functools[{{end}}",
want: "functools[",
},
{
name: "uppercase tool call with incomplete bracket",
template: "{{if .ToolCalls}}[TOOL_CALL] [{{end}}",
want: "[TOOL_CALL] [",
},
{
name: "uppercase tool call with adjacent bracket",
template: "{{if .ToolCalls}}[TOOL_CALL][{{end}}",
want: "[TOOL_CALL][",
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
tmpl, err := template.New("test").Parse(tc.template)
if err != nil && tc.template != "" {
t.Fatalf("failed to parse template: %v", err)
}
got := parseTag(tmpl)
if got != tc.want {
t.Errorf("got text %q, want %q", got, tc.want)
}
})
}
}

View File

@@ -1,67 +0,0 @@
{{- if or .Tools .System }}<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>
{{- if .Tools }}# Safety Preamble
The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral.
# System Preamble
## Basic Rules
You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.
{{ if .System }}# User Preamble
{{ .System }}
{{- end }}
## Available Tools
Here is a list of tools that you have available to you:
{{- range .Tools }}
```python
def {{ .Function.Name }}(
{{- range $name, $property := .Function.Parameters.Properties }}{{ $name }}: {{ $property.Type }}, {{ end }}) -> List[Dict]:
"""{{ .Function.Description }}
{{- if .Function.Parameters.Properties }}
Args:
{{- range $name, $property := .Function.Parameters.Properties }}
{{ $name }} ({{ $property.Type }}): {{ $property.Description }}
{{- end }}
{{- end }}
"""
pass
```
{{- end }}
{{- else if .System }}{{ .System }}
{{- end }}<|END_OF_TURN_TOKEN|>
{{- end }}
{{- range .Messages }}
{{- if eq .Role "system" }}
{{- continue }}
{{- end }}<|START_OF_TURN_TOKEN|>
{{- if eq .Role "user" }}<|USER_TOKEN|>{{ .Content }}
{{- else if eq .Role "assistant" }}<|CHATBOT_TOKEN|>
{{- if .Content }}{{ .Content }}
{{- else if .ToolCalls }}
Action: ```json
[
{{- range .ToolCalls }}
{
"tool_name": "{{ .Function.Name }}",
"parameters": {{ .Function.Arguments }}
}
{{- end }}
]```
{{ continue }}
{{ end }}
{{- else if eq .Role "tool" }}<|SYSTEM_TOKEN|><results>
{{ .Content }}</results>
{{- end }}<|END_OF_TURN_TOKEN|>
{{- end }}
{{- if .Tools }}<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example:
```json
[
{
"tool_name": title of the tool in the specification,
"parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters
}
]```
{{- end }}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>

View File

@@ -1,39 +0,0 @@
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble
The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral.
# System Preamble
## Basic Rules
You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.
# User Preamble
You are a knowledgeable assistant. You can answer questions and perform tasks.
## Available Tools
Here is a list of tools that you have available to you:
```python
def get_current_weather(format: string, location: string, ) -> List[Dict]:
"""Get the current weather
Args:
format (string): The temperature unit to use. Infer this from the user's location.
location (string): The city and state, e.g. San Francisco, CA
"""
pass
```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>What's the weather like today in Paris?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
Action: ```json
[
{
"tool_name": "get_current_weather",
"parameters": {"format":"celsius","location":"Paris, France"}
}
]```
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><results>
22</results><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>The current temperature in Paris, France is 22 degrees Celsius.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>What's the weather like today in San Francisco and Toronto?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example:
```json
[
{
"tool_name": title of the tool in the specification,
"parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters
}
]```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>

View File

@@ -1,31 +0,0 @@
{{- if or .System .Tools }}<|start_header_id|>system<|end_header_id|>
{{- if .System }}
{{ .System }}
{{- end }}
In addition to plain text responses, you can chose to call one or more of the provided functions.
Use the following rule to decide when to call a function:
* if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so
* if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls
If you decide to call functions:
* prefix function calls with functools marker (no closing marker required)
* all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...]
* follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples
* respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0
* make sure you pick the right functions that match the user intent
Available functions as JSON spec:
{{- if .Tools }}
{{ .Tools }}
{{- end }}<|eot_id|>
{{- end }}
{{- range .Messages }}<|start_header_id|>
{{- if or (eq .Role "user") (eq .Role "assistant") (eq .Role "tool") }}{{ .Role }}
{{- end }}<|end_header_id|>
{{- if .Content }}{{ .Content }}
{{- else if .ToolCalls }} functools[
{{- range .ToolCalls }}{{ "{" }}"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}{{ "}" }}
{{- end }}]
{{- end }}<|eot_id|>
{{- end }}<|start_header_id|>assistant<|end_header_id|>

View File

@@ -1,17 +0,0 @@
<|start_header_id|>system<|end_header_id|>
You are a knowledgeable assistant. You can answer questions and perform tasks.
In addition to plain text responses, you can chose to call one or more of the provided functions.
Use the following rule to decide when to call a function:
* if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so
* if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls
If you decide to call functions:
* prefix function calls with functools marker (no closing marker required)
* all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...]
* follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples
* respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0
* make sure you pick the right functions that match the user intent
Available functions as JSON spec:
[{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}]<|eot_id|><|start_header_id|><|end_header_id|>You are a knowledgeable assistant. You can answer questions and perform tasks.<|eot_id|><|start_header_id|>user<|end_header_id|>What's the weather like today in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|> functools[{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}]<|eot_id|><|start_header_id|>tool<|end_header_id|>22<|eot_id|><|start_header_id|>assistant<|end_header_id|>The current temperature in Paris, France is 22 degrees Celsius.<|eot_id|><|start_header_id|>user<|end_header_id|>What's the weather like today in San Francisco and Toronto?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

View File

@@ -1,43 +0,0 @@
{{- if .Messages }}
{{- if or .System .Tools }}<|start_header_id|>system<|end_header_id|>
{{ .System }}
{{- if .Tools }} You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
<tool_call>
{"name": <function-name>,"arguments": <args-dict>}
</tool_call>
Here are the available tools:
<tools>
{{- range .Tools }} {{ .Function }}
{{- end }} </tools>
{{- end }}
{{- end }}<|eot_id|>
{{- range .Messages }}
{{- if ne .Role "system" }}<|start_header_id|>{{ .Role }}<|end_header_id|>
{{ if eq .Role "user" }}{{ .Content }}
{{- else if eq .Role "assistant" }}
{{- if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{- end }}
</tool_call>
{{- end }}
{{- else if eq .Role "tool" }}<tool_response>
{{ .Content }}
</tool_response>
{{- end }}<|eot_id|>
{{- end }}
{{- end }}<|start_header_id|>assistant<|end_header_id|>
{{ else }}
{{ if .System }}<|start_header_id|>system<|end_header_id|>
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
{{ end }}{{ .Response }}
{{- if .Response }}<|eot_id|>
{{- end }}

View File

@@ -1,24 +0,0 @@
<|start_header_id|>system<|end_header_id|>
You are a knowledgeable assistant. You can answer questions and perform tasks. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
<tool_call>
{"name": <function-name>,"arguments": <args-dict>}
</tool_call>
Here are the available tools:
<tools> {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}} </tools><|eot_id|><|start_header_id|>user<|end_header_id|>
What's the weather like today in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
<tool_call>
{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}
</tool_call><|eot_id|><|start_header_id|>tool<|end_header_id|>
<tool_response>
22
</tool_response><|eot_id|><|start_header_id|>assistant<|end_header_id|>
The current temperature in Paris, France is 22 degrees Celsius.<|eot_id|><|start_header_id|>user<|end_header_id|>
What's the weather like today in San Francisco and Toronto?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

View File

@@ -1,44 +0,0 @@
<|start_header_id|>system<|end_header_id|>
Cutting Knowledge Date: December 2023
{{ if .System }}{{ .System }}
{{- end }}
{{- if .Tools }}When you receive a tool call response, use the output to format an answer to the orginal user question.
You are a helpful assistant with tool calling capabilities.
{{- end }}<|eot_id|>
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 }}
{{- if eq .Role "user" }}<|start_header_id|>user<|end_header_id|>
{{- if and $.Tools $last }}
Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.
Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.
{{ range $.Tools }}
{{- . }}
{{ end }}
{{ .Content }}<|eot_id|>
{{- else }}
{{ .Content }}<|eot_id|>
{{- end }}{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
{{ end }}
{{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|>
{{- if .ToolCalls }}
{{ range .ToolCalls }}
{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }}
{{- else }}
{{ .Content }}
{{- end }}{{ if not $last }}<|eot_id|>{{ end }}
{{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|>
{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
{{ end }}
{{- end }}
{{- end }}

View File

@@ -1,24 +0,0 @@
<|start_header_id|>system<|end_header_id|>
Cutting Knowledge Date: December 2023
You are a knowledgeable assistant. You can answer questions and perform tasks.When you receive a tool call response, use the output to format an answer to the orginal user question.
You are a helpful assistant with tool calling capabilities.<|eot_id|><|start_header_id|>user<|end_header_id|>
What's the weather like today in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
{"name": "get_current_weather", "parameters": {"format":"celsius","location":"Paris, France"}}<|eot_id|><|start_header_id|>ipython<|end_header_id|>
22<|eot_id|><|start_header_id|>assistant<|end_header_id|>
The current temperature in Paris, France is 22 degrees Celsius.<|eot_id|><|start_header_id|>user<|end_header_id|>
Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.
Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.
{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}
What's the weather like today in San Francisco and Toronto?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

View File

@@ -1,39 +0,0 @@
[
{
"role": "system",
"content": "You are a knowledgeable assistant. You can answer questions and perform tasks."
},
{
"role": "user",
"content": "What's the weather like today in Paris?"
},
{
"role": "assistant",
"tool_calls": [
{
"id": "89a1e453-0bce-4de3-a456-c54bed09c520",
"type": "function",
"function": {
"name": "get_current_weather",
"arguments": {
"location": "Paris, France",
"format": "celsius"
}
}
}
]
},
{
"role": "tool",
"tool_call_id": "89a1e453-0bce-4de3-a456-c54bed09c520",
"content": "22"
},
{
"role": "assistant",
"content": "The current temperature in Paris, France is 22 degrees Celsius."
},
{
"role": "user",
"content": "What's the weather like today in San Francisco and Toronto?"
}
]

View File

@@ -1,15 +0,0 @@
{{- range $index, $_ := .Messages }}
{{- if eq .Role "user" }}
{{- if and (eq (len (slice $.Messages $index)) 1) $.Tools }}[AVAILABLE_TOOLS] {{ $.Tools }}[/AVAILABLE_TOOLS]
{{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}
{{ end }}{{ .Content }}[/INST]
{{- else if eq .Role "assistant" }}
{{- if .Content }} {{ .Content }}</s>
{{- else if .ToolCalls }}[TOOL_CALLS] [
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{- end }}]</s>
{{- end }}
{{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS]
{{- end }}
{{- end }}

View File

@@ -1,3 +0,0 @@
[INST] What's the weather like today in Paris?[/INST][TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}]</s>[TOOL_RESULTS] {"content": 22}[/TOOL_RESULTS] The current temperature in Paris, France is 22 degrees Celsius.</s>[AVAILABLE_TOOLS] [{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}][/AVAILABLE_TOOLS][INST] You are a knowledgeable assistant. You can answer questions and perform tasks.
What's the weather like today in San Francisco and Toronto?[/INST]

View File

@@ -1,33 +0,0 @@
{{- if (or .Tools .System) }}<extra_id_0>System
{{ if .System }}{{ .System }}
{{ end }}
{{- if .Tools }}
{{- range .Tools }}<tool> {{ . }} </tool>{{ end }}
{{ end }}
{{- end }}
{{- range $i, $m := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<extra_id_1>User
{{ .Content }}
{{- if $last }}
<extra_id_1>Assistant
{{- end }}
{{ else if eq .Role "tool" }}<extra_id_1>Tool
{{ .Content }}
{{- if $last }}
<extra_id_1>Assistant
{{- end }}
{{ else if eq .Role "assistant" }}<extra_id_1>Assistant
{{- if .ToolCalls }}
{{ range .ToolCalls }}<toolcall> {"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}} </toolcall> {{ end }}
{{ else }}
{{ .Content }}
{{- if not $last }}
{{ end }}
{{- end }}
{{- end }}
{{- end }}

View File

@@ -1,18 +0,0 @@
<extra_id_0>System
You are a knowledgeable assistant. You can answer questions and perform tasks.
<tool> {"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}} </tool>
<extra_id_1>User
What's the weather like today in Paris?
<extra_id_1>Assistant
<toolcall> {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}} </toolcall>
<extra_id_1>Tool
22
<extra_id_1>Assistant
The current temperature in Paris, France is 22 degrees Celsius.
<extra_id_1>User
What's the weather like today in San Francisco and Toronto?
<extra_id_1>Assistant

View File

@@ -1,51 +0,0 @@
{{- if .Suffix }}<|fim_prefix|>{{ .Prompt }}<|fim_suffix|>{{ .Suffix }}<|fim_middle|>
{{- else if .Messages }}
{{- if or .System .Tools }}<|im_start|>system
{{- if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end }}<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
{{- else }}
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}

View File

@@ -1,31 +0,0 @@
<|im_start|>system
You are a knowledgeable assistant. You can answer questions and perform tasks.
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call><|im_end|>
<|im_start|>user
What's the weather like today in Paris?<|im_end|>
<|im_start|>assistant
<tool_call>
{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}
</tool_call><|im_end|>
<|im_start|>user
<tool_response>
22
</tool_response><|im_end|>
<|im_start|>assistant
The current temperature in Paris, France is 22 degrees Celsius.<|im_end|>
<|im_start|>user
What's the weather like today in San Francisco and Toronto?<|im_end|>
<|im_start|>assistant

View File

@@ -1,50 +0,0 @@
{{- if .Messages }}
{{- if or .System .Tools }}<|im_start|>system
{{- if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end }}<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
{{- else }}
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}

View File

@@ -1,31 +0,0 @@
<|im_start|>system
You are a knowledgeable assistant. You can answer questions and perform tasks.
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call><|im_end|>
<|im_start|>user
What's the weather like today in Paris?<|im_end|>
<|im_start|>assistant
<tool_call>
{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}
</tool_call><|im_end|>
<|im_start|>user
<tool_response>
22
</tool_response><|im_end|>
<|im_start|>assistant
The current temperature in Paris, France is 22 degrees Celsius.<|im_end|>
<|im_start|>user
What's the weather like today in San Francisco and Toronto?<|im_end|>
<|im_start|>assistant

View File

@@ -1,30 +0,0 @@
[
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
},
"format": {
"type": "string",
"enum": [
"celsius",
"fahrenheit"
],
"description": "The temperature unit to use. Infer this from the user's location."
}
},
"required": [
"location",
"format"
]
}
}
}
]

View File

@@ -1,45 +0,0 @@
{{- if .System }}{{ .System }}
{{ end }}
{{- range $i, $_ := .Messages }}
{{- if eq .Role "user" }}### Instruction:
{{- if and $.Tools (le (len (slice $.Messages $i)) 2) }}
[BEGIN OF TASK INSTRUCTION]
You are an expert in composing functions. You are given a question and a set of possible functions.
Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
If none of the functions can be used, point it out and refuse to answer.
If the given question lacks the parameters required by the function, also point it out.
[END OF TASK INSTRUCTION]
[BEGIN OF AVAILABLE TOOLS]
{{ $.Tools }}
[END OF AVAILABLE TOOLS]
[BEGIN OF FORMAT INSTRUCTION]
The output MUST strictly adhere to the following JSON format, and NO other text MUST be included.
The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.
```
{
"tool_calls": [
{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},
... (more tool calls as required)
]
}
```
[END OF FORMAT INSTRUCTION]
[BEGIN OF QUERY]
{{ .Content }}
[END OF QUERY]
{{ else }}
{{ .Content }}
{{ end }}
{{- else if .ToolCalls }}### Response:
{"tool_calls": [{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}{{ end }}]}
<|EOT|>
{{ else if eq .Role "assistant" }}### Response:
{{ .Content }}
<|EOT|>
{{ end }}
{{- end }}### Response:

View File

@@ -1,40 +0,0 @@
You are a knowledgeable assistant. You can answer questions and perform tasks.
### Instruction:
What's the weather like today in Paris?
### Response:
{"tool_calls": [{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}]}
<|EOT|>
### Response:
The current temperature in Paris, France is 22 degrees Celsius.
<|EOT|>
### Instruction:
[BEGIN OF TASK INSTRUCTION]
You are an expert in composing functions. You are given a question and a set of possible functions.
Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
If none of the functions can be used, point it out and refuse to answer.
If the given question lacks the parameters required by the function, also point it out.
[END OF TASK INSTRUCTION]
[BEGIN OF AVAILABLE TOOLS]
[{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}]
[END OF AVAILABLE TOOLS]
[BEGIN OF FORMAT INSTRUCTION]
The output MUST strictly adhere to the following JSON format, and NO other text MUST be included.
The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.
```
{
"tool_calls": [
{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},
... (more tool calls as required)
]
}
```
[END OF FORMAT INSTRUCTION]
[BEGIN OF QUERY]
What's the weather like today in San Francisco and Toronto?
[END OF QUERY]
### Response:

View File

@@ -1,253 +1,294 @@
package tools
import (
"bytes"
"encoding/json"
"errors"
"log/slog"
"strings"
gotmpl "text/template"
"text/template"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/template"
)
var (
errInvalidToolCall = errors.New("invalid tool call format")
errAccumulateMore = errors.New("need to accumulate more content")
type toolsState int
const (
toolsState_LookingForTag toolsState = iota
toolsState_ToolCalling
toolsState_Done
)
type Parser struct {
greedyParseJSON bool
prefix string
prefixFound bool
tmpl gotmpl.Template
sb strings.Builder
index int
name string
arguments string
tag string
tools []api.Tool
state toolsState
buffer []byte
n int
}
// parseJSONToolCalls attempts to parse a JSON string into a slice of ToolCalls.
//
// Parameters:
// - s: The string to parse
// - name: The field name from template that identifies the tool call name
// - arguments: The field name from template that identifies the tool call arguments
//
// Returns:
// - []api.ToolCall: The parsed tool calls if successful
// - error: ErrAccumulateMore if braces unbalanced, ErrInvalidToolCall if invalid, or nil if successful
func parseJSONToolCalls(s string, name, arguments string, prefix string) ([]api.ToolCall, error) {
// Check for balanced braces before attempting to parse
braceCount := 0
squareCount := 0
startIndex := -1
var rawToolCalls []string
s = strings.TrimSpace(s)
// Only track these if we don't have a prefix as it will be cut off from the prefix. Also track in the parseLeadingJSON case.
trackSquareBrackets := prefix == "" || !strings.HasSuffix(prefix, "[") || strings.HasPrefix(s, "[")
for i, c := range s {
switch c {
case '{':
braceCount++
if startIndex == -1 {
startIndex = i
}
case '}':
braceCount--
if braceCount == 0 {
rawToolCalls = append(rawToolCalls, s[startIndex:i+1])
startIndex = -1
}
case '[':
if trackSquareBrackets {
squareCount++
}
case ']':
if trackSquareBrackets {
squareCount--
}
}
// Negative means we have an extra closing brace/bracket
if braceCount < 0 || squareCount < 0 {
return nil, errInvalidToolCall
}
}
// If braces/brackets aren't balanced, need more input
if braceCount > 0 || squareCount > 0 {
return nil, errAccumulateMore
}
t := strings.TrimSpace(s)
if len(t) == 0 {
return nil, errAccumulateMore
}
// If the input is a single square bracket, it's not a valid tool call
if t[0] == '[' && len(t) == 1 {
return nil, errAccumulateMore
}
// Attempt full unmarshal of the JSON
var toolCalls []api.ToolCall
for _, rawToolCall := range rawToolCalls {
var resp map[string]any
if err := json.Unmarshal([]byte(rawToolCall), &resp); err != nil {
continue
}
// Collect nested objects that could contain tool calls
objs := collect(resp)
if len(objs) == 0 {
continue
}
// Extract tool calls from objects
for _, kv := range objs {
n, nok := kv[name].(string)
a, aok := kv[arguments].(map[string]any)
if nok && aok {
toolCalls = append(toolCalls, api.ToolCall{
Function: api.ToolCallFunction{
Name: n,
Arguments: a,
},
})
} else {
slog.Debug("No valid tool call found in object.", "object", kv)
}
}
}
// Valid JSON, no tool calls found
if len(toolCalls) == 0 {
slog.Debug("No valid tool calls found in any raw tool calls.", "rawToolCalls", rawToolCalls)
return nil, errInvalidToolCall
}
return toolCalls, nil
// NewParser creates a new tool call parser from a model's chat
// template and a list of provided tools.
func NewParser(tmpl *template.Template, tools []api.Tool) *Parser {
return NewParserWithTag(tools, parseTag(tmpl))
}
// checkPrefix processes a string to find and handle a prefix pattern.
//
// Returns:
// - The processed string with prefix removed if found
// - error: ErrAccumulateMore if prefix is incomplete, or nil if successful
func (p *Parser) checkPrefix(s string) (string, error) {
if s == "" || p.prefix == "" {
return s, nil
}
// Check for prefix at start of string
if cut, hasPrefix := strings.CutPrefix(s, p.prefix); hasPrefix {
// Found prefix at start - accumulate for potential tool
p.prefixFound = true
return cut, nil
}
// Check if prefix overlaps end of string
if idx := suffixOverlap(s, p.prefix); idx != -1 {
// Return everything except overlapping portion
p.sb.Reset()
p.sb.WriteString(s[idx:])
return s[:idx], errAccumulateMore
}
// Check if prefix appears in middle of string
if idx := strings.Index(s, p.prefix); idx != -1 {
// Save remainder starting at prefix for next pass
p.sb.Reset()
p.sb.WriteString(strings.TrimSpace(s[idx:]))
// Return everything before prefix
return s[:idx], errAccumulateMore
}
// No partial prefix found
return s, nil
}
// Add processes a string input to parse tool calls and content.
// It handles prefix detection and JSON parsing to extract tool calls.
//
// Returns:
// - tools: Any parsed tool calls
// - content: Non-tool call content
func (p *Parser) Add(s string) (tools []api.ToolCall, content string) {
p.sb.WriteString(s)
s = p.sb.String()
// Check for prefix pattern in input
s, err := p.checkPrefix(s)
if err != nil {
// Need more input to complete prefix
return nil, s
}
// Exit if prefix exists in template, greedy parsing is off, and prefix not found
if !p.greedyParseJSON && !p.prefixFound {
p.sb.Reset()
return nil, s
}
toolCalls, err := parseJSONToolCalls(s, p.name, p.arguments, p.prefix)
if err != nil {
if errors.Is(err, errAccumulateMore) {
return nil, ""
}
p.sb.Reset()
// Only do greedy JSON parsing if there is no prefix from template
if p.prefix != "" {
p.greedyParseJSON = false
}
if p.index != 0 && p.prefix == "" {
return nil, ""
}
if p.prefixFound {
// Drop tokens since prefix was found
return nil, ""
}
return nil, s
}
for _, tc := range toolCalls {
tc.Function.Index = p.index
p.index++
}
p.sb.Reset()
return toolCalls, ""
}
// NewParser creates a new tool call parser from a template. It extracts the tool call format,
// prefix, and field names from the template to use for parsing tool calls from model output.
//
// Returns an error if the template does not contain valid tool call formatting.
func NewParser(templateToProcess *gotmpl.Template) (*Parser, error) {
parsed, err := template.Parse(templateToProcess.Root.String())
if err != nil {
return nil, err
}
tt, err := toolTemplate(parsed)
if err != nil {
return nil, err
}
tp := toolPrefix(templateToProcess)
name, arguments, err := extractToolArgs(tt)
if err != nil {
return nil, err
}
func NewParserWithTag(tools []api.Tool, tag string) *Parser {
return &Parser{
tmpl: *tt,
sb: strings.Builder{},
prefix: tp,
greedyParseJSON: true,
name: name,
arguments: arguments,
}, nil
tag: tag,
tools: tools,
}
}
// Add processes a string input to parse tool calls and content that
// should be sent back to the user.
func (p *Parser) Add(s string) (calls []api.ToolCall, content string) {
if p.state == toolsState_Done {
return nil, s
}
p.buffer = append(p.buffer, s...)
if p.state == toolsState_LookingForTag {
i, found := p.findTag()
if i == -1 {
content = string(p.buffer)
p.buffer = []byte{}
} else {
content = string(p.buffer[:i])
p.buffer = p.buffer[i:]
}
// for models where { or [ are used as tool calling
// tags, we only support parsing tools if the first non-
// whitespace character is { or [
if p.tag == "{" || p.tag == "[" {
if strings.TrimSpace(content) != "" {
p.state = toolsState_Done
return nil, content + string(p.buffer)
}
}
if !found {
return nil, content
}
p.state = toolsState_ToolCalling
}
for {
call := p.parseToolCall()
if call == nil {
break
}
calls = append(calls, *call)
}
if p.done() {
p.state = toolsState_Done
content = string(p.buffer)
p.buffer = []byte{}
}
return calls, content
}
// findTag searches the buffer to find and handle a tool calling tag
// returning true if the tag was found and false otherwise, and
// a string content signaling any content that should be sent back to the user
func (p *Parser) findTag() (int, bool) {
// First check for complete substring anywhere in s
if i := bytes.Index(p.buffer, []byte(p.tag)); i > -1 {
return i, true
}
// Then check for partial suffix overlap
max := min(len(p.buffer), len(p.tag))
for i := max; i > 0; i-- {
if bytes.HasSuffix(p.buffer, []byte(p.tag[:i])) {
return len(p.buffer) - i, false
}
}
return -1, false
}
// parseToolCall finds the next complete tool call in the buffer
// incrementing n and advancing the buffer.
func (p *Parser) parseToolCall() *api.ToolCall {
var tool *api.Tool
var end int = len(p.buffer)
var i int
// find tool name
for _, t := range p.tools {
n := t.Function.Name
if i = bytes.Index(p.buffer, []byte(n)); i != -1 {
if i+len(n) < end {
tool = &t
end = i + len(n)
}
}
}
if tool == nil {
return nil
}
// only look for arguments if the tool has parameters
args := map[string]any{}
if len(tool.Function.Parameters.Properties) > 0 {
if args, i = p.findArguments(*tool); args == nil {
return nil
}
if i > end {
end = i
}
}
tc := &api.ToolCall{
Function: api.ToolCallFunction{
Name: tool.Function.Name,
Arguments: args,
Index: p.n,
},
}
p.n++
p.buffer = p.buffer[end:]
return tc
}
// findArguments returns the first object that appears to be
// arguments for the provided tool, returning nil
func (p *Parser) findArguments(tool api.Tool) (map[string]any, int) {
if len(p.buffer) == 0 {
return nil, 0
}
// no arguments to parse
if len(tool.Function.Parameters.Properties) == 0 {
return nil, 0
}
var braces int
var start int = -1
var end int
var object []byte
// find any outer json object
for i, c := range p.buffer {
if c == '{' {
braces++
if start == -1 {
start = i
}
}
if c == '}' {
if start != -1 {
braces--
if braces == 0 {
end = i + 1
object = p.buffer[start:end]
break
}
}
}
}
if braces > 0 {
return nil, 0
}
var data map[string]any
// not valid json
if err := json.Unmarshal(object, &data); err != nil {
return nil, 0
}
var find func(obj any) map[string]any
find = func(obj any) map[string]any {
switch obj := obj.(type) {
case map[string]any:
found := true
for key := range obj {
if _, exists := tool.Function.Parameters.Properties[key]; !exists {
found = false
break
}
}
if found {
return obj
}
for _, value := range obj {
if result := find(value); result != nil {
return result
}
}
case []any:
for _, item := range obj {
if result := find(item); result != nil {
return result
}
}
}
return nil
}
result := find(data)
if result != nil {
return result, end
}
return nil, 0
}
// done checks if the parser is done parsing by looking
// for closing tag. currently only } and ] are supported
// for closing tags as {} or [] pairs may not always
// represent tool calls and we need to send the content back
func (p *Parser) done() bool {
var open, close rune
switch p.tag {
case "{":
open, close = '{', '}'
case "[":
open, close = '[', ']'
default:
return false
}
var count int
for _, c := range p.buffer {
if c == byte(open) {
count++
} else if c == byte(close) {
count--
if count == 0 {
return true
}
}
}
return false
}
// Content returns any remaining content that
// should be sent to the user. This should be the empty string
// string unless the tag is { or [ and a tool call was not found
func (p *Parser) Content() string {
if p.n > 0 {
return ""
}
if p.tag == "{" || p.tag == "[" {
return string(p.buffer)
}
return ""
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,227 +0,0 @@
package tools
import (
"bytes"
"encoding/json"
"errors"
"log/slog"
"slices"
"strings"
gotmpl "text/template"
"text/template/parse"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/template"
)
// extractToolCallsFormat traverses a template AST to find text that follows a ".ToolCalls" condition.
// It walks the template nodes looking for if-statements containing ".ToolCalls" and extracts any
// immediate text nodes that follow. This is used to identify tool call prefixes and formatting.
//
// Returns:
// - string: The extracted text following the first ".ToolCalls" condition found
// - bool: Whether a ".ToolCalls" condition was found in the template
func extractToolCallsFormat(tmpl *gotmpl.Template) (string, bool) {
if tmpl == nil || tmpl.Tree == nil {
slog.Debug("template or tree is nil")
return "", false
}
var result string
var found bool
var walk func(nodes []parse.Node)
walk = func(nodes []parse.Node) {
for _, node := range nodes {
if found {
return
}
switch n := node.(type) {
case *parse.IfNode:
if isToolCallsNode(n) {
// Collect immediate TextNode(s) at start of IfNode's list
var sb strings.Builder
for _, innerNode := range n.List.Nodes {
if tn, ok := innerNode.(*parse.TextNode); ok {
sb.Write(tn.Text)
} else {
// Stop at first non-text node
break
}
}
result = sb.String()
found = true
return
}
// Recurse into child nodes
walk(n.List.Nodes)
if n.ElseList != nil {
walk(n.ElseList.Nodes)
}
case *parse.ListNode:
walk(n.Nodes)
case *parse.RangeNode:
walk(n.List.Nodes)
if n.ElseList != nil {
walk(n.ElseList.Nodes)
}
case *parse.WithNode:
walk(n.List.Nodes)
if n.ElseList != nil {
walk(n.ElseList.Nodes)
}
default:
// Continue to next node
continue
}
}
}
walk(tmpl.Tree.Root.Nodes)
return result, found
}
// isToolCallsNode detects if a node's condition includes ".ToolCalls"
func isToolCallsNode(n *parse.IfNode) bool {
for _, cmd := range n.Pipe.Cmds {
for _, arg := range cmd.Args {
if field, ok := arg.(*parse.FieldNode); ok {
if slices.Contains(field.Ident, "ToolCalls") {
return true
}
}
}
}
return false
}
func toolPrefix(tmpl *gotmpl.Template) string {
tokenText, ok := extractToolCallsFormat(tmpl)
if !ok {
return ""
}
tokenText = strings.TrimSpace(tokenText)
tokenText = strings.ReplaceAll(tokenText, "\r", "")
tokenText = strings.ReplaceAll(tokenText, "\n", " ")
return tokenText
}
// toolTemplate creates a subtree from the node that ranges over .ToolCalls
//
// Returns:
// - *gotmpl.Template: The subtree containing the .ToolCalls range
// - error: Error if parsing failed
func toolTemplate(t *template.Template) (*gotmpl.Template, error) {
tmpl := t.Subtree(func(n parse.Node) bool {
if t, ok := n.(*parse.RangeNode); ok {
return slices.Contains(template.Identifiers(t.Pipe), "ToolCalls")
}
return false
})
if tmpl == nil {
return nil, errors.New("failed to find tool template")
}
return tmpl, nil
}
// suffixOverlap returns the index in s where the longest suffix overlap with prefix begins
//
// Returns:
// - int: The starting index in s where the suffix overlap begins
func suffixOverlap(s, prefix string) int {
max := min(len(prefix), len(s))
for i := max; i > 0; i-- {
if strings.HasSuffix(s, prefix[:i]) {
return len(s) - i
}
}
return -1
}
// extractToolArgs executes a template with a known tool call format to extract the name and arguments
//
// Returns:
// - string: The name of the tool call
// - string: The arguments of the tool call
// - error: Error if parsing failed
func extractToolArgs(tmpl *gotmpl.Template) (name, arguments string, err error) {
var b bytes.Buffer
if err := tmpl.Execute(&b, map[string][]api.ToolCall{
"ToolCalls": {
{
Function: api.ToolCallFunction{
Name: "@@name@@",
Arguments: api.ToolCallFunctionArguments{
"@@argument@@": 1,
},
},
},
},
}); err != nil {
return "", "", err
}
var obj any
err = json.Unmarshal(b.Bytes(), &obj)
if err != nil {
return "", "", err
}
var objs []map[string]any
switch v := obj.(type) {
case map[string]any:
objs = []map[string]any{v}
case []map[string]any:
objs = v
case []any:
objs = collect(v)
}
if len(objs) == 0 {
return "", "", errors.New("no template objects found")
}
// find the keys that correspond to the name and arguments fields
for k, v := range objs[0] {
switch v.(type) {
case string:
name = k
case map[string]any:
arguments = k
}
}
if name == "" || arguments == "" {
slog.Debug("missing required fields in tool call template", "name", name, "arguments", arguments)
return "", "", errors.New("missing required fields in tool call template")
}
return name, arguments, nil
}
// collect recursively traverses an object to collect all nested maps
//
// Returns:
// - []map[string]any: A slice of all nested maps found in the object
func collect(obj any) []map[string]any {
var all []map[string]any
switch o := obj.(type) {
case map[string]any:
all = append(all, o)
for _, v := range o {
all = append(all, collect(v)...)
}
case []any:
for _, v := range o {
all = append(all, collect(v)...)
}
default:
return nil
}
return all
}

View File

@@ -1,464 +0,0 @@
package tools
import (
"testing"
gotmpl "text/template"
"github.com/ollama/ollama/template"
)
func TestExtractToolCallsFormat(t *testing.T) {
cases := []struct {
name string
template string
want string
found bool
}{
{
name: "nil template",
template: "",
want: "",
found: false,
},
{
name: "basic tool call with text",
template: "{{if .ToolCalls}}Hello world{{end}}",
want: "Hello world",
found: true,
},
{
name: "tool call with json format",
template: "{{if .ToolCalls}}```json\n{{end}}",
want: "```json\n",
found: true,
},
{
name: "tool call in range",
template: "{{range .ToolCalls}}tool: {{.}}{{end}}",
want: "",
found: false,
},
{
name: "tool call with multiple text nodes",
template: "{{if .ToolCalls}}First text{{if .Something}}inner{{end}}Second text{{end}}",
want: "First text",
found: true,
},
{
name: "nested if without tool calls",
template: "{{if .Something}}{{if .OtherThing}}text{{end}}{{end}}",
want: "",
found: false,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
tmpl, err := gotmpl.New("test").Parse(tc.template)
if err != nil && tc.template != "" {
t.Fatalf("failed to parse template: %v", err)
}
got, found := extractToolCallsFormat(tmpl)
if got != tc.want {
t.Errorf("got text %q, want %q", got, tc.want)
}
if found != tc.found {
t.Errorf("got found %v, want %v", found, tc.found)
}
})
}
}
func TestToolPrefix(t *testing.T) {
cases := []struct {
name string
template string
want string
}{
{
name: "basic tool call with action prefix",
template: "{{if .ToolCalls}}Action: ```json{{end}}",
want: "Action: ```json",
},
{
name: "incomplete functools bracket",
template: "{{if .ToolCalls}}functools[{{end}}",
want: "functools[",
},
{
name: "tool call with angle brackets",
template: "{{if .ToolCalls}}Hello, world! <tool_call>{{end}}",
want: "Hello, world! <tool_call>",
},
{
name: "multiple tool call formats",
template: "{{if .ToolCalls}}[tool_call] <tool_call>{{end}}",
want: "[tool_call] <tool_call>",
},
{
name: "single angle bracket tool call",
template: "{{if .ToolCalls}}<tool_call>{{end}}",
want: "<tool_call>",
},
{
name: "incomplete angle bracket after tool call",
template: "{{if .ToolCalls}}[tool_call] <{{end}}",
want: "[tool_call] <",
},
{
name: "angle bracket prefix with tool call",
template: "{{if .ToolCalls}}> <tool_call>{{end}}",
want: "> <tool_call>",
},
{
name: "uppercase tool call with incomplete bracket",
template: "{{if .ToolCalls}}[TOOL_CALL] [{{end}}",
want: "[TOOL_CALL] [",
},
{
name: "uppercase tool call with adjacent bracket",
template: "{{if .ToolCalls}}[TOOL_CALL][{{end}}",
want: "[TOOL_CALL][",
},
{
name: "tool call with pipe delimiters",
template: "{{if .ToolCalls}}<|tool_call|>{{end}}",
want: "<|tool_call|>",
},
{
name: "tool with no prefix",
template: "{{if .ToolCalls}}{{end}}",
want: "",
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
tmpl, err := gotmpl.New("test").Parse(tt.template)
if err != nil {
t.Fatalf("failed to parse template: %v", err)
}
got := toolPrefix(tmpl)
if got != tt.want {
t.Errorf("ToolToken(%q) = %q; want %q", tt.template, got, tt.want)
}
})
}
}
func TestToolTemplate(t *testing.T) {
cases := []struct {
name string
template string
want bool
}{
{
name: "basic tool call range",
template: "{{range .ToolCalls}}test{{end}}",
want: true,
},
{
name: "no tool calls",
template: "{{range .Other}}test{{end}}",
want: false,
},
{
name: "nested tool calls",
template: "{{range .Outer}}{{range .ToolCalls}}test{{end}}{{end}}",
want: true,
},
{
name: "empty template",
template: "",
want: false,
},
{
name: "tool calls in if statement",
template: "{{if .ToolCalls}}test{{end}}",
want: false,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
tmpl, err := gotmpl.New("test").Parse(tt.template)
if err != nil {
t.Fatalf("failed to parse template: %v", err)
}
parsed, err := template.Parse(tmpl.Root.String())
if err != nil {
t.Fatalf("failed to parse template: %v", err)
}
_, err = toolTemplate(parsed)
if err != nil && tt.want {
t.Errorf("toolTemplate() = %v; want %v", err, tt.want)
}
})
}
}
func TestSuffixOverlap(t *testing.T) {
cases := []struct {
name string
s string
d string
want int
}{
{
name: "no overlap",
s: "hello world",
d: "<tool_call>",
want: -1,
},
{
name: "full overlap",
s: "<tool_call>",
d: "<tool_call>",
want: 0,
},
{
name: "partial overlap",
s: "text <tool_call>",
d: "<tool_call>",
want: 5,
},
{
name: "delimiter longer than string",
s: "<tool>",
d: "<tool_call>",
want: -1,
},
{
name: "empty string",
s: "",
d: "<tool_call>",
want: -1,
},
{
name: "empty delimiter",
s: "<tool_call>",
d: "",
want: -1,
},
{
name: "single char overlap",
s: "test<",
d: "<tool_call>",
want: 4,
},
{
name: "partial tool call",
s: "hello <tool_",
d: "<tool_call>",
want: 6,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
got := suffixOverlap(tt.s, tt.d)
if got != tt.want {
t.Errorf("suffixOverlap(%q, %q) = %d; want %d", tt.s, tt.d, got, tt.want)
}
})
}
}
func TestExtractToolArgs(t *testing.T) {
cases := []struct {
name string
template string
want string
ok bool
}{
{
name: "basic tool call with text after",
template: `{{if .ToolCalls}}tool response{{end}}`,
want: "tool response",
ok: true,
},
{
name: "tool call with mixed content after",
template: `{{if .ToolCalls}}<tool_call>{{.Something}}{{end}}`,
want: "<tool_call>",
ok: true,
},
{
name: "tool call with no text after",
template: `{{if .ToolCalls}}{{.Something}}{{end}}`,
want: "",
ok: true,
},
{
name: "nested tool call",
template: `{{if .Something}}{{if .ToolCalls}}[TOOL_CALL]{{end}}{{end}}`,
want: "[TOOL_CALL]",
ok: true,
},
{
name: "no tool calls",
template: `{{if .Something}}no tools here{{end}}`,
want: "",
ok: false,
},
{
name: "empty template",
template: ``,
want: "",
ok: false,
},
{
name: "multiple tool calls sections",
template: `{{if .ToolCalls}}first{{end}}{{if .ToolCalls}}second{{end}}`,
want: "first",
ok: true,
},
{
name: "range over tool calls",
template: `{{if .ToolCalls}}{{range .ToolCalls}}tool{{end}}{{end}}`,
want: "",
ok: true,
},
{
name: "tool calls with pipe delimiters",
template: `{{if .ToolCalls}}<|tool|>{{end}}`,
want: "<|tool|>",
ok: true,
},
{
name: "tool calls with nested template",
template: `{{if .ToolCalls}}{{template "tool" .}}{{end}}`,
want: "",
ok: true,
},
{
name: "tool calls with whitespace variations",
template: `{{if .ToolCalls}} tool {{end}}`,
want: " tool ",
ok: true,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
tmpl, err := gotmpl.New("test").Parse(tt.template)
if err != nil {
t.Fatalf("failed to parse template: %v", err)
}
got, ok := extractToolCallsFormat(tmpl)
if got != tt.want {
t.Errorf("TextAfterToolCalls() got = %q, want %q", got, tt.want)
}
if ok != tt.ok {
t.Errorf("TextAfterToolCalls() ok = %v, want %v", ok, tt.ok)
}
})
}
}
func TestCollect(t *testing.T) {
cases := []struct {
name string
obj any
want []map[string]any
}{
{
name: "simple map",
obj: map[string]any{
"key": "value",
},
want: []map[string]any{
{"key": "value"},
},
},
{
name: "nested map",
obj: map[string]any{
"outer": map[string]any{
"inner": "value",
},
},
want: []map[string]any{
{"outer": map[string]any{"inner": "value"}},
{"inner": "value"},
},
},
{
name: "array of maps",
obj: []any{
map[string]any{"key1": "val1"},
map[string]any{"key2": "val2"},
},
want: []map[string]any{
{"key1": "val1"},
{"key2": "val2"},
},
},
{
name: "deeply nested",
obj: map[string]any{
"l1": map[string]any{
"l2": map[string]any{
"l3": "value",
},
},
},
want: []map[string]any{
{"l1": map[string]any{"l2": map[string]any{"l3": "value"}}},
{"l2": map[string]any{"l3": "value"}},
{"l3": "value"},
},
},
{
name: "non-map value",
obj: "string",
want: nil,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
got := collect(tt.obj)
if len(got) != len(tt.want) {
t.Errorf("collect() got %d maps, want %d", len(got), len(tt.want))
return
}
// Compare each map in the result
for i := range tt.want {
if !mapsEqual(got[i], tt.want[i]) {
t.Errorf("collect() map[%d] = %v, want %v", i, got[i], tt.want[i])
}
}
})
}
}
// mapsEqual compares two maps for deep equality
func mapsEqual(m1, m2 map[string]any) bool {
if len(m1) != len(m2) {
return false
}
for k, v1 := range m1 {
v2, ok := m2[k]
if !ok {
return false
}
switch val1 := v1.(type) {
case map[string]any:
val2, ok := v2.(map[string]any)
if !ok || !mapsEqual(val1, val2) {
return false
}
default:
if v1 != v2 {
return false
}
}
}
return true
}