Merge branch 'ollama:main' into main

This commit is contained in:
likelovewant
2025-08-03 08:22:07 +08:00
committed by GitHub
28 changed files with 472 additions and 385 deletions

View File

@@ -65,7 +65,7 @@ continuation of the sentence:
Examples:
llm/backend/mlx: support the llama architecture
CONTRIBUTING: provide clairity on good commit messages, and bad
CONTRIBUTING: provide clarity on good commit messages, and bad
Bad Examples:

View File

@@ -432,6 +432,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [GPTranslate](https://github.com/philberndt/GPTranslate) (A fast and lightweight, AI powered desktop translation application written with Rust and Tauri. Features real-time translation with OpenAI/Azure/Ollama.)
- [ollama launcher](https://github.com/NGC13009/ollama-launcher) (A launcher for Ollama, aiming to provide users with convenient functions such as ollama server launching, management, or configuration.)
- [ai-hub](https://github.com/Aj-Seven/ai-hub) (AI Hub supports multiple models via API keys and Chat support via Ollama API.)
- [Mayan EDMS](https://gitlab.com/mayan-edms/mayan-edms) (Open source document management system to organize, tag, search, and automate your files with powerful Ollama driven workflows.)
### Cloud

View File

@@ -1137,6 +1137,14 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
if errors.Is(err, context.Canceled) {
return nil, nil
}
// this error should ideally be wrapped properly by the client
if strings.Contains(err.Error(), "upstream error") {
p.StopAndClear()
fmt.Println("An error occurred while processing your message. Please try again.")
fmt.Println()
return nil, nil
}
return nil, err
}

View File

@@ -385,18 +385,21 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
case "modelfile":
fmt.Println(resp.Modelfile)
case "parameters":
fmt.Println("Model defined parameters:")
if resp.Parameters == "" {
fmt.Println("No parameters were specified for this model.")
fmt.Println(" No additional parameters were specified for this model.")
} else {
if len(opts.Options) > 0 {
fmt.Println("User defined parameters:")
for k, v := range opts.Options {
fmt.Printf("%-*s %v\n", 30, k, v)
}
fmt.Println()
for _, l := range strings.Split(resp.Parameters, "\n") {
fmt.Printf(" %s\n", l)
}
fmt.Println("Model defined parameters:")
fmt.Println(resp.Parameters)
}
fmt.Println()
if len(opts.Options) > 0 {
fmt.Println("User defined parameters:")
for k, v := range opts.Options {
fmt.Printf(" %-*s %v\n", 30, k, v)
}
fmt.Println()
}
case "system":
switch {

View File

@@ -11,14 +11,13 @@ import (
"io"
"io/fs"
"log/slog"
"maps"
"os"
"path/filepath"
"slices"
"strings"
"testing"
"golang.org/x/exp/maps"
"github.com/ollama/ollama/fs/ggml"
)
@@ -137,9 +136,7 @@ func TestConvertModel(t *testing.T) {
t.Fatal(err)
}
keys := maps.Keys(expect)
slices.Sort(keys)
for _, k := range keys {
for _, k := range slices.Sorted(maps.Keys(expect)) {
if v, ok := actual[k]; !ok {
t.Errorf("missing %s", k)
} else if v != expect[k] {
@@ -343,9 +340,7 @@ func TestConvertAdapter(t *testing.T) {
actual := generateResultsJSON(t, r, m.KV(), m.Tensors())
keys := maps.Keys(c.Expected)
slices.Sort(keys)
for _, k := range keys {
for _, k := range slices.Sorted(maps.Keys(c.Expected)) {
if v, ok := actual[k]; !ok {
t.Errorf("missing %s", k)
} else if v != c.Expected[k] {

View File

@@ -8,12 +8,12 @@ import (
"fmt"
"io"
"io/fs"
"maps"
"slices"
"strings"
"github.com/d4l3k/go-bfloat16"
"github.com/x448/float16"
"golang.org/x/exp/maps"
)
type safetensorMetadata struct {
@@ -46,8 +46,7 @@ func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]T
return nil, err
}
keys := maps.Keys(headers)
slices.Sort(keys)
keys := slices.Sorted(maps.Keys(headers))
names := make(map[string]struct{}, len(keys))

View File

@@ -8,11 +8,10 @@ import (
"fmt"
"io/fs"
"log/slog"
"maps"
"os"
"slices"
"strings"
"golang.org/x/exp/maps"
)
const (
@@ -260,11 +259,8 @@ func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) {
tokens[token.ID] = token
}
keys := maps.Keys(tokens)
slices.Sort(keys)
v := Vocabulary{Model: "gpt2"}
for _, k := range keys {
for _, k := range slices.Sorted(maps.Keys(tokens)) {
token := tokens[k]
v.Tokens = append(v.Tokens, token.Content)
v.Scores = append(v.Scores, float32(token.ID))

View File

@@ -58,7 +58,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
driverMajor, driverMinor, err := AMDDriverVersion()
if err != nil {
// TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU
slog.Warn("ollama recommends running the https://www.amd.com/en/support/linux-drivers", "error", err)
slog.Warn("ollama recommends running the https://www.amd.com/en/support/download/linux-drivers.html", "error", err)
}
// Determine if the user has already pre-selected which GPUs to look at, then ignore the others

View File

@@ -500,11 +500,11 @@ The `message` object has the following fields:
- `thinking`: (for thinking models) the model's thinking process
- `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
- `tool_calls` (optional): a list of tools in JSON that the model wants to use
- `tool_name` (optional): add the name of the tool that was executed to inform the model of the result
- `tool_name` (optional): add the name of the tool that was executed to inform the model of the result
Advanced parameters (optional):
- `format`: the format to return a response in. Format can be `json` or a JSON schema.
- `format`: the format to return a response in. Format can be `json` or a JSON schema.
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)

View File

@@ -118,7 +118,7 @@ To run tests, use `go test`:
go test ./...
```
> NOTE: In rare cirumstances, you may need to change a package using the new
> NOTE: In rare circumstances, you may need to change a package using the new
> "synctest" package in go1.24.
>
> If you do not have the "synctest" package enabled, you will not see build or

View File

@@ -16,7 +16,7 @@ curl -fsSL https://ollama.com/install.sh | sh
Download and extract the package:
```shell
curl -L https://ollama.com/download/ollama-linux-amd64.tgz -o ollama-linux-amd64.tgz
curl -LO https://ollama.com/download/ollama-linux-amd64.tgz
sudo tar -C /usr -xzf ollama-linux-amd64.tgz
```

View File

@@ -72,7 +72,7 @@ client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
# Define the schema for the response
class FriendInfo(BaseModel):
name: str
age: int
age: int
is_available: bool
class FriendList(BaseModel):

View File

@@ -9,7 +9,7 @@ cat ~/.ollama/logs/server.log
On **Linux** systems with systemd, the logs can be found with this command:
```shell
journalctl -u ollama --no-pager --follow --pager-end
journalctl -u ollama --no-pager --follow --pager-end
```
When you run Ollama in a **container**, the logs go to stdout/stderr in the container:
@@ -23,7 +23,7 @@ docker logs <container-name>
If manually running `ollama serve` in a terminal, the logs will be on that terminal.
When you run Ollama on **Windows**, there are a few different locations. You can view them in the explorer window by hitting `<cmd>+R` and type in:
- `explorer %LOCALAPPDATA%\Ollama` to view logs. The most recent server logs will be in `server.log` and older logs will be in `server-#.log`
- `explorer %LOCALAPPDATA%\Ollama` to view logs. The most recent server logs will be in `server.log` and older logs will be in `server-#.log`
- `explorer %LOCALAPPDATA%\Programs\Ollama` to browse the binaries (The installer adds this to your user PATH)
- `explorer %HOMEPATH%\.ollama` to browse where models and configuration is stored
@@ -38,7 +38,7 @@ Join the [Discord](https://discord.gg/ollama) for help interpreting the logs.
## LLM libraries
Ollama includes multiple LLM libraries compiled for different GPUs and CPU vector features. Ollama tries to pick the best one based on the capabilities of your system. If this autodetection has problems, or you run into other problems (e.g. crashes in your GPU) you can workaround this by forcing a specific LLM library. `cpu_avx2` will perform the best, followed by `cpu_avx` an the slowest but most compatible is `cpu`. Rosetta emulation under MacOS will work with the `cpu` library.
Ollama includes multiple LLM libraries compiled for different GPUs and CPU vector features. Ollama tries to pick the best one based on the capabilities of your system. If this autodetection has problems, or you run into other problems (e.g. crashes in your GPU) you can workaround this by forcing a specific LLM library. `cpu_avx2` will perform the best, followed by `cpu_avx` and the slowest but most compatible is `cpu`. Rosetta emulation under MacOS will work with the `cpu` library.
In the server log, you will see a message that looks something like this (varies from release to release):
@@ -97,7 +97,7 @@ If none of those resolve the problem, gather additional information and file an
On linux, AMD GPU access typically requires `video` and/or `render` group membership to access the `/dev/kfd` device. If permissions are not set up correctly, Ollama will detect this and report an error in the server log.
When running in a container, in some Linux distributions and container runtimes, the ollama process may be unable to access the GPU. Use `ls -lnd /dev/kfd /dev/dri /dev/dri/*` on the host system to determine the **numeric** group IDs on your system, and pass additional `--group-add ...` arguments to the container so it can access the required devices. For example, in the following output `crw-rw---- 1 0 44 226, 0 Sep 16 16:55 /dev/dri/card0` the group ID column is `44`
When running in a container, in some Linux distributions and container runtimes, the ollama process may be unable to access the GPU. Use `ls -lnd /dev/kfd /dev/dri /dev/dri/*` on the host system to determine the **numeric** group IDs on your system, and pass additional `--group-add ...` arguments to the container so it can access the required devices. For example, in the following output `crw-rw---- 1 0 44 226, 0 Sep 16 16:55 /dev/dri/card0` the group ID column is `44`
If you are experiencing problems getting Ollama to correctly discover or use your GPU for inference, the following may help isolate the failure.
- `AMD_LOG_LEVEL=3` Enable info log levels in the AMD HIP/ROCm libraries. This can help show more detailed error codes that can help troubleshoot problems

2
go.mod
View File

@@ -71,7 +71,7 @@ require (
github.com/ugorji/go/codec v1.2.12 // indirect
golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.36.0
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect
golang.org/x/net v0.38.0 // indirect
golang.org/x/sys v0.31.0
golang.org/x/term v0.30.0

View File

@@ -19,12 +19,22 @@ type shiftFn func(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, e
// The tensors are of shape embed dim, kv heads, batch size
// The mask is of shape history size, batch size
type Causal struct {
DType ml.DType
windowSize int32
chunkSize int32
DType ml.DType
// swaWindowSize is the number of tokens that will be included in the mask
// during attention operations. swaMemorySize is the number of tokens that
// will be retained in memory for partial prefix caching. Set to math.MaxInt32
// for unlimited or if sliding window attention is not being used.
swaWindowSize int32
swaMemorySize int32
chunkSize int32
opts CausalOptions
// maxBatch is the largest batch that we might receive
maxBatch int
// config controls mostly backend-specific optimizations
config *ml.CacheConfig
@@ -85,32 +95,41 @@ type cellRange struct {
func NewCausalCache(shift shiftFn) *Causal {
return &Causal{
windowSize: math.MaxInt32,
shiftFn: shift,
ctxs: make(map[int]ml.Context),
keys: make(map[int]ml.Tensor),
values: make(map[int]ml.Tensor),
shiftFn: shift,
ctxs: make(map[int]ml.Context),
keys: make(map[int]ml.Tensor),
values: make(map[int]ml.Tensor),
}
}
func NewSWACache(windowSize int32, shift shiftFn) *Causal {
return &Causal{
windowSize: windowSize,
shiftFn: shift,
ctxs: make(map[int]ml.Context),
keys: make(map[int]ml.Tensor),
values: make(map[int]ml.Tensor),
swaWindowSize: windowSize,
shiftFn: shift,
ctxs: make(map[int]ml.Context),
keys: make(map[int]ml.Tensor),
values: make(map[int]ml.Tensor),
}
}
func NewSWAMemCache(windowSize int32, memorySize int32, shift shiftFn) *Causal {
return &Causal{
swaWindowSize: windowSize,
swaMemorySize: memorySize,
shiftFn: shift,
ctxs: make(map[int]ml.Context),
keys: make(map[int]ml.Tensor),
values: make(map[int]ml.Tensor),
}
}
func NewChunkedAttentionCache(chunkSize int32, shift shiftFn) *Causal {
return &Causal{
windowSize: math.MaxInt32,
chunkSize: chunkSize,
shiftFn: shift,
ctxs: make(map[int]ml.Context),
keys: make(map[int]ml.Tensor),
values: make(map[int]ml.Tensor),
chunkSize: chunkSize,
shiftFn: shift,
ctxs: make(map[int]ml.Context),
keys: make(map[int]ml.Tensor),
values: make(map[int]ml.Tensor),
}
}
@@ -135,11 +154,25 @@ func (c *Causal) Init(backend ml.Backend, dtype ml.DType, maxSequences, capacity
c.config.MaskDType = ml.DTypeF32
}
if c.swaWindowSize == 0 {
c.swaWindowSize = math.MaxInt32
}
if c.swaMemorySize == 0 {
c.swaMemorySize = c.swaWindowSize
}
if int(c.swaMemorySize) > capacity {
c.swaMemorySize = math.MaxInt32
}
if c.swaMemorySize < c.swaWindowSize {
panic(fmt.Errorf("sliding window memory (%v) must be at least as large as the window (%v)", c.swaMemorySize, c.swaWindowSize))
}
var cacheSize int
if c.windowSize == math.MaxInt32 || capacity < int(c.windowSize) {
if c.swaMemorySize == math.MaxInt32 {
cacheSize = maxSequences * capacity
} else {
cacheSize = (maxSequences * int(c.windowSize)) + maxBatch
cacheSize = (maxSequences * int(c.swaMemorySize)) + maxBatch
}
cacheSize = roundUp(cacheSize, c.config.CachePadding)
c.cells = make([]cacheCell, cacheSize)
@@ -147,6 +180,7 @@ func (c *Causal) Init(backend ml.Backend, dtype ml.DType, maxSequences, capacity
c.DType = dtype
c.cellRanges = make(map[int]cellRange)
c.backend = backend
c.maxBatch = maxBatch
}
func (c *Causal) SetConfig(config ml.CacheConfig) {
@@ -183,7 +217,6 @@ func (c *Causal) StartForward(ctx ml.Context, batch input.Batch, reserve bool) e
return err
}
c.curCellRange = newRange()
for i, pos := range batch.Positions {
seq := batch.Sequences[i]
@@ -194,19 +227,12 @@ func (c *Causal) StartForward(ctx ml.Context, batch input.Batch, reserve bool) e
seqRange = newRange()
}
if c.curLoc+i > seqRange.max {
seqRange.max = c.curLoc + i
}
if seqRange.max > c.curCellRange.max {
c.curCellRange.max = seqRange.max
}
seqRange.min = min(seqRange.min, c.curLoc+i)
c.curCellRange.min = min(c.curCellRange.min, c.curLoc+i)
seqRange.max = max(seqRange.max, c.curLoc+i)
c.curCellRange.max = max(c.curCellRange.max, c.curLoc+i)
if c.curLoc+i < seqRange.min {
seqRange.min = c.curLoc + i
}
if seqRange.min < c.curCellRange.min {
c.curCellRange.min = seqRange.min
}
c.cellRanges[seq] = seqRange
}
} else {
@@ -248,7 +274,16 @@ func (c *Causal) findStartLoc() (int, error) {
}
func (c *Causal) updateSlidingWindow() {
if c.windowSize == math.MaxInt32 {
c.curCellRange = newRange()
if c.swaMemorySize == math.MaxInt32 {
for _, seq := range c.curSequences {
if seqRange, ok := c.cellRanges[seq]; ok {
c.curCellRange.min = min(c.curCellRange.min, seqRange.min)
c.curCellRange.max = max(c.curCellRange.max, seqRange.max)
}
}
return
}
@@ -278,12 +313,16 @@ func (c *Causal) updateSlidingWindow() {
for i := oldRange.min; i <= oldRange.max; i++ {
if slices.Contains(c.cells[i].sequences, seq) {
if c.cells[i].pos < pos-c.windowSize {
if c.cells[i].pos < pos-c.swaMemorySize {
c.cells[i].sequences = slices.DeleteFunc(c.cells[i].sequences, func(s int) bool { return s == seq })
} else {
newRange.min = min(newRange.min, i)
newRange.max = max(newRange.max, i)
}
if c.cells[i].pos >= pos-c.swaWindowSize {
c.curCellRange.min = min(c.curCellRange.min, i)
c.curCellRange.max = max(c.curCellRange.max, i)
}
}
}
@@ -323,7 +362,7 @@ func (c *Causal) buildMask(ctx ml.Context) ml.Tensor {
if !slices.Contains(c.cells[j].sequences, c.curSequences[i]) ||
(enabled && c.cells[j].pos > c.curPositions[i]) ||
c.chunkSize > 0 && c.cells[j].pos < c.curPositions[i]-c.curPositions[i]%c.chunkSize ||
c.cells[j].pos < c.curPositions[i]-c.windowSize {
c.cells[j].pos < c.curPositions[i]-c.swaWindowSize {
mask[i*length+(j-c.curCellRange.min)] = float32(math.Inf(-1))
}
}
@@ -481,6 +520,8 @@ func (c *Causal) defrag() {
c.cellRanges[seq] = seqRange
}
c.updateSlidingWindow()
}
func (c *Causal) SetLayer(layer int) {
@@ -606,7 +647,7 @@ func (c *Causal) CopyPrefix(srcSeq, dstSeq int, len int32) {
}
func (c *Causal) CanResume(seq int, pos int32) bool {
if c.windowSize == math.MaxInt32 {
if c.swaMemorySize == math.MaxInt32 {
return true
}
@@ -628,8 +669,8 @@ func (c *Causal) CanResume(seq int, pos int32) bool {
return false
}
lastWindowStart := max(0, last-c.windowSize)
posWindowStart := max(0, pos-c.windowSize)
lastWindowStart := max(0, last-c.swaMemorySize)
posWindowStart := max(0, pos-c.swaWindowSize)
return posWindowStart >= lastWindowStart
}
@@ -639,48 +680,64 @@ func (c *Causal) shift(seq int, beginIndex, offset int32) error {
return ErrNotSupported
}
ctx := c.backend.NewContext()
defer ctx.Close()
seqRange := c.cellRanges[seq]
size := seqRange.max - seqRange.min + 1
offsets := make([]int32, size)
for i := range offsets {
cell := c.cells[seqRange.min+i]
for start := seqRange.min; start <= seqRange.max; start += c.maxBatch {
size := min(seqRange.max-start+1, c.maxBatch)
offsets := make([]int32, size)
if slices.Contains(cell.sequences, seq) && cell.pos >= beginIndex {
offsets[i] = offset
var batchFirst, batchLast int
batchFirst = -1
for i := range offsets {
cell := c.cells[start+i]
if slices.Contains(cell.sequences, seq) && cell.pos >= beginIndex {
offsets[i] = offset
if batchFirst < 0 {
batchFirst = i
}
batchLast = i
}
}
}
kShift := ctx.Input().FromIntSlice(offsets, len(offsets))
for i, key := range c.keys {
if key == nil {
if batchFirst < 0 {
continue
}
kHeadDim := key.Dim(0)
numKVHeads := key.Dim(1)
rowSize := key.Stride(2)
offsets = offsets[batchFirst : batchLast+1]
key = key.View(ctx, rowSize*seqRange.min,
kHeadDim, key.Stride(1),
numKVHeads, key.Stride(2),
size,
)
ctx := c.backend.NewContext()
kShift := ctx.Input().FromIntSlice(offsets, len(offsets))
roped, err := c.shiftFn(ctx, i, key, kShift)
if err != nil {
return err
for i, key := range c.keys {
if key == nil {
continue
}
kHeadDim := key.Dim(0)
numKVHeads := key.Dim(1)
rowSize := key.Stride(2)
key = key.View(ctx, rowSize*(start+batchFirst),
kHeadDim, key.Stride(1),
numKVHeads, key.Stride(2),
len(offsets),
)
roped, err := c.shiftFn(ctx, i, key, kShift)
if err != nil {
ctx.Close()
return err
}
ctx.Forward(roped.Copy(ctx, key))
}
ctx.Forward(roped.Copy(ctx, key))
ctx.Compute()
ctx.Close()
}
ctx.Compute()
return nil
}

View File

@@ -60,6 +60,8 @@ func TestSWA(t *testing.T) {
cache.Init(backend, ml.DTypeF16, 1, 16, 16)
x := float32(math.Inf(-1))
tests := []testCase{
{
name: "FirstBatch",
@@ -69,7 +71,12 @@ func TestSWA(t *testing.T) {
pos: []int32{0, 1, 2, 3},
expected: []float32{1, 2, 3, 4},
expectedShape: []int{1, 1, 4},
expectedMask: []float32{0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0},
expectedMask: []float32{
0, x, x, x,
0, 0, x, x,
x, 0, 0, x,
x, x, 0, 0,
},
},
{
name: "SecondBatch",
@@ -79,7 +86,53 @@ func TestSWA(t *testing.T) {
pos: []int32{4, 5},
expected: []float32{5, 6, 3, 4},
expectedShape: []int{1, 1, 4},
expectedMask: []float32{0, float32(math.Inf(-1)), float32(math.Inf(-1)), 0, 0, 0, float32(math.Inf(-1)), float32(math.Inf(-1))},
expectedMask: []float32{
0, x, x, 0,
0, 0, x, x,
},
},
}
testCache(t, backend, cache, tests)
}
func TestSWAMem(t *testing.T) {
backend := &testBackend{}
cache := NewSWAMemCache(1, 3, nil)
defer cache.Close()
cache.Init(backend, ml.DTypeF16, 1, 16, 16)
x := float32(math.Inf(-1))
tests := []testCase{
{
name: "FirstBatch",
in: []float32{1, 2, 3, 4},
inShape: []int{1, 1, 4},
seqs: []int{0, 0, 0, 0},
pos: []int32{0, 1, 2, 3},
expected: []float32{1, 2, 3, 4},
expectedShape: []int{1, 1, 4},
expectedMask: []float32{
0, x, x, x,
0, 0, x, x,
x, 0, 0, x,
x, x, 0, 0,
},
},
{
name: "SecondBatch",
in: []float32{5, 6},
inShape: []int{1, 1, 2},
seqs: []int{0, 0},
pos: []int32{4, 5},
expected: []float32{4, 5, 6},
expectedShape: []int{1, 1, 3},
expectedMask: []float32{
0, 0, x,
x, 0, 0,
},
},
}
@@ -437,6 +490,70 @@ func TestCanResume(t *testing.T) {
}
}
func TestCanResumeSWAMem(t *testing.T) {
backend := &testBackend{}
windowSize := int32(4)
memSize := int32(5)
cache := NewSWAMemCache(windowSize, memSize, nil)
defer cache.Close()
cache.Init(backend, ml.DTypeF16, 1, 16, 16)
context := backend.NewContext()
defer context.Close()
err := cache.StartForward(context, input.Batch{
Positions: []int32{0, 1, 2, 3, 4, 5},
Sequences: []int{0, 0, 0, 0, 0, 0},
}, false)
if err != nil {
t.Fatalf("StartForward failed: %v", err)
}
cache.SetLayer(0)
tensor := context.FromFloatSlice([]float32{1, 2, 3, 4, 5, 6}, 1, 1, 6)
cache.Put(context, tensor, tensor)
// shift window by adding position 6
err = cache.StartForward(context, input.Batch{
Positions: []int32{6, 7},
Sequences: []int{0, 0},
}, false)
if err != nil {
t.Fatalf("StartForward failed: %v", err)
}
cache.SetLayer(0)
tensor = context.FromFloatSlice([]float32{7, 8}, 1, 1, 2)
cache.Put(context, tensor, tensor)
// only the latest position has overlapping windows
if cache.CanResume(0, 0) {
t.Errorf("after shift: CanResume(0, 0) = true, want false (outside window)")
}
if cache.CanResume(0, 1) {
t.Errorf("after shift: CanResume(0, 1) = true, want false (outside window)")
}
if cache.CanResume(0, 2) {
t.Errorf("after shift: CanResume(0, 2) = true, want false (outside window)")
}
if cache.CanResume(0, 3) {
t.Errorf("after shift: CanResume(0, 3) = true, want false (outside window)")
}
if cache.CanResume(0, 4) {
t.Errorf("after shift: CanResume(0, 4) = true, want false (outside window)")
}
if cache.CanResume(0, 5) {
t.Errorf("after shift: CanResume(0, 5) = true, want false (outside window)")
}
if !cache.CanResume(0, 6) {
t.Errorf("after shift: CanResume(0, 6) = false, want true (inside window)")
}
if !cache.CanResume(0, 7) {
t.Errorf("after shift: CanResume(0, 7) = false, want true (latest position)")
}
}
type testBackend struct {
ml.Backend
}

View File

@@ -16,10 +16,10 @@ ggml-ci
2 files changed, 67 insertions(+), 14 deletions(-)
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
index ee4f2dcb..f20f5615 100644
index a9eeebc6..110c9ece 100644
--- a/ggml/src/ggml-metal/ggml-metal.m
+++ b/ggml/src/ggml-metal/ggml-metal.m
@@ -489,6 +489,7 @@ enum ggml_metal_kernel_type {
@@ -489,6 +489,7 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
GGML_METAL_KERNEL_TYPE_COS,
GGML_METAL_KERNEL_TYPE_NEG,
GGML_METAL_KERNEL_TYPE_SUM_ROWS,
@@ -27,7 +27,7 @@ index ee4f2dcb..f20f5615 100644
GGML_METAL_KERNEL_TYPE_POOL_2D_AVG_F32,
GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32,
GGML_METAL_KERNEL_TYPE_ARGMAX,
@@ -1436,6 +1437,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
@@ -1436,6 +1437,7 @@ @implementation GGMLMetalClass
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_COS, cos, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_NEG, neg, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true);

View File

@@ -52,7 +52,7 @@ index 64fb4ff4..5b9a0fe3 100644
static __device__ __forceinline__ float warp_reduce_max(float x) {
#pragma unroll
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 4c829153..9e64e5ae 100644
index d6960174..2b9fabf4 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -35,6 +35,7 @@

View File

@@ -0,0 +1,50 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Oliver Simons <osimons@nvidia.com>
Date: Tue, 22 Jul 2025 11:02:28 +0200
Subject: [PATCH] Enable CUDA Graphs for gemma3n.
Similar to
https://github.com/ggml-org/llama.cpp/pull/14741,
though ollama has a slightly different model graph
than llama.cpp which requires different workaround
checks.
---
ggml/src/ggml-cuda/ggml-cuda.cu | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 2b9fabf4..28ccf4be 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2474,6 +2474,9 @@ static bool check_node_graph_compatibility_and_refresh_copy_ops(ggml_backend_cud
// Loop over nodes in GGML graph to obtain info needed for CUDA graph
cuda_ctx->cuda_graph->cpy_dest_ptrs.clear();
+ const std::string gemma3n_per_layer_proj_src1_name = " (reshaped)";
+ const std::string gemma3n_node_name = "node_";
+
for (int i = 0; i < cgraph->n_nodes; i++) {
ggml_tensor * node = cgraph->nodes[i];
@@ -2495,12 +2498,17 @@ static bool check_node_graph_compatibility_and_refresh_copy_ops(ggml_backend_cud
#endif
}
- if (node->op == GGML_OP_ADD && node->src[1] && node->src[1]->ne[1] > 1) {
- // disable CUDA graphs for batch size > 1 for now.
- // Changes in batch size or context size can cause changes to the grid size of some kernels.
+ // workarounds to exclude Gemma3n's `project_per_layer_input` operation from the batch-size heuristic, specific to ollama's implementation of gemma3n
+ // number of layers is different for per_layer_proj between gemma3n:2b and gemma3n:4b, which is why we don't check that value here
+ if (node->op == GGML_OP_ADD && node->src[1] && node->src[1]->ne[1] > 1 && !(node->ne[0] == 256
+ && node->ne[2] == 1
+ && node->ne[3] == 1
+ && node->src[0] ? std::string(node->src[0]->name).find(gemma3n_node_name) != std::string::npos : false
+ && node->src[1] ? node->src[1]->name == gemma3n_per_layer_proj_src1_name : false)) {
+ // Generally, changes in batch size or context size can cause changes to the grid size of some kernels.
use_cuda_graph = false;
#ifndef NDEBUG
- GGML_LOG_DEBUG("%s: disabling CUDA graphs due to batch size > 1 [%s] [%ld %ld %ld %ld]\n", __func__, node->name, node->ne[0], node->ne[1], node->ne[2], node->ne[3]);
+ GGML_LOG_INFO("%s: disabling CUDA graphs due to batch size > 1 [%s] [%ld %ld %ld %ld]\n", __func__, node->name, node->ne[0], node->ne[1], node->ne[2], node->ne[3]);
#endif
}

View File

@@ -0,0 +1,27 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <daniel@ollama.com>
Date: Wed, 30 Jul 2025 08:43:46 -0700
Subject: [PATCH] BF16 macos version guard
Only enable BF16 on supported MacOS versions (v14+)
---
ggml/src/ggml-metal/ggml-metal.m | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
index 110c9ece..ab46f6e3 100644
--- a/ggml/src/ggml-metal/ggml-metal.m
+++ b/ggml/src/ggml-metal/ggml-metal.m
@@ -89,7 +89,11 @@
ctx->has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6];
#if defined(GGML_METAL_USE_BF16)
- ctx->use_bfloat = ctx->has_bfloat;
+ if (@available(macOS 14.0, *)) {
+ ctx->use_bfloat = ctx->has_bfloat;
+ } else {
+ ctx->use_bfloat = false;
+ }
#else
ctx->use_bfloat = false;
#endif

View File

@@ -2474,6 +2474,9 @@ static bool check_node_graph_compatibility_and_refresh_copy_ops(ggml_backend_cud
// Loop over nodes in GGML graph to obtain info needed for CUDA graph
cuda_ctx->cuda_graph->cpy_dest_ptrs.clear();
const std::string gemma3n_per_layer_proj_src1_name = " (reshaped)";
const std::string gemma3n_node_name = "node_";
for (int i = 0; i < cgraph->n_nodes; i++) {
ggml_tensor * node = cgraph->nodes[i];
@@ -2495,12 +2498,17 @@ static bool check_node_graph_compatibility_and_refresh_copy_ops(ggml_backend_cud
#endif
}
if (node->op == GGML_OP_ADD && node->src[1] && node->src[1]->ne[1] > 1) {
// disable CUDA graphs for batch size > 1 for now.
// Changes in batch size or context size can cause changes to the grid size of some kernels.
// workarounds to exclude Gemma3n's `project_per_layer_input` operation from the batch-size heuristic, specific to ollama's implementation of gemma3n
// number of layers is different for per_layer_proj between gemma3n:2b and gemma3n:4b, which is why we don't check that value here
if (node->op == GGML_OP_ADD && node->src[1] && node->src[1]->ne[1] > 1 && !(node->ne[0] == 256
&& node->ne[2] == 1
&& node->ne[3] == 1
&& node->src[0] ? std::string(node->src[0]->name).find(gemma3n_node_name) != std::string::npos : false
&& node->src[1] ? node->src[1]->name == gemma3n_per_layer_proj_src1_name : false)) {
// Generally, changes in batch size or context size can cause changes to the grid size of some kernels.
use_cuda_graph = false;
#ifndef NDEBUG
GGML_LOG_DEBUG("%s: disabling CUDA graphs due to batch size > 1 [%s] [%ld %ld %ld %ld]\n", __func__, node->name, node->ne[0], node->ne[1], node->ne[2], node->ne[3]);
GGML_LOG_INFO("%s: disabling CUDA graphs due to batch size > 1 [%s] [%ld %ld %ld %ld]\n", __func__, node->name, node->ne[0], node->ne[1], node->ne[2], node->ne[3]);
#endif
}

View File

@@ -89,7 +89,11 @@ static id<MTLDevice> ggml_backend_metal_device_acq(struct ggml_backend_metal_dev
ctx->has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6];
#if defined(GGML_METAL_USE_BF16)
ctx->use_bfloat = ctx->has_bfloat;
if (@available(macOS 14.0, *)) {
ctx->use_bfloat = ctx->has_bfloat;
} else {
ctx->use_bfloat = false;
}
#else
ctx->use_bfloat = false;
#endif

View File

@@ -203,10 +203,9 @@ func (a AltUp) Predict(ctx ml.Context, hiddenStates ml.Tensor, opts *TextOptions
coefficients := a.PredictionCoefficient.Forward(ctx, modalities)
coefficients = coefficients.Reshape(ctx, opts.altupInputs, opts.altupInputs, coefficients.Dim(1), coefficients.Dim(2))
hiddenStates = hiddenStates.Permute(ctx, 1, 2, 0, 3).Contiguous(ctx)
predictions := coefficients.Mulmat(ctx, hiddenStates)
predictions = predictions.Add(ctx, hiddenStates)
return predictions.Permute(ctx, 2, 0, 1, 3).Contiguous(ctx)
predictions := coefficients.Mulmat(ctx, hiddenStates.Permute(ctx, 1, 2, 0, 3).Contiguous(ctx))
predictions = predictions.Permute(ctx, 2, 0, 1, 3).Contiguous(ctx)
return predictions.Add(ctx, hiddenStates)
}
func (a AltUp) Correct(ctx ml.Context, predictions, activated, one ml.Tensor, opts *TextOptions) ml.Tensor {

View File

@@ -842,8 +842,11 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
}
resp.Parameters = strings.Join(params, "\n")
for k, v := range req.Options {
if _, ok := req.Options[k]; ok {
if len(req.Options) > 0 {
if m.Options == nil {
m.Options = make(map[string]any)
}
for k, v := range req.Options {
m.Options[k] = v
}
}

View File

@@ -16,6 +16,7 @@ import (
"os"
"path/filepath"
"reflect"
"slices"
"sort"
"strings"
"testing"
@@ -82,19 +83,6 @@ func createTestFile(t *testing.T, name string) (string, string) {
return f.Name(), digest
}
// equalStringSlices checks if two slices of strings are equal.
func equalStringSlices(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}
type panicTransport struct{}
func (t *panicTransport) RoundTrip(r *http.Request) (*http.Response, error) {
@@ -447,7 +435,7 @@ func TestRoutes(t *testing.T) {
"stop \"foo\"",
"top_p 0.9",
}
if !equalStringSlices(params, expectedParams) {
if !slices.Equal(params, expectedParams) {
t.Errorf("expected parameters %v, got %v", expectedParams, params)
}
paramCount, ok := showResp.ModelInfo["general.parameter_count"].(float64)

View File

@@ -6,6 +6,7 @@ import (
"encoding/json"
"errors"
"io"
"maps"
"math"
"slices"
"strings"
@@ -14,7 +15,6 @@ import (
"text/template/parse"
"github.com/agnivade/levenshtein"
"golang.org/x/exp/maps"
"github.com/ollama/ollama/api"
)
@@ -157,9 +157,7 @@ func (t *Template) Vars() []string {
set[strings.ToLower(n)] = struct{}{}
}
vars = maps.Keys(set)
slices.Sort(vars)
return vars
return slices.Sorted(maps.Keys(set))
}
type Values struct {

View File

@@ -120,16 +120,14 @@ func (p *Parser) parseToolCall() *api.ToolCall {
return nil
}
// only look for arguments after the tool name if the tool has parameters
// TODO (jmorganca): while probably uncommon, this doesn't support
// parsing arguments before the tool name, which may be needed in the future
args := map[string]any{}
if len(tool.Function.Parameters.Properties) > 0 {
var i int
if args, i = findArguments(*tool, p.buffer[end:]); args == nil {
return nil
var args map[string]any
if found, i := findArguments(p.buffer); found == nil {
return nil
} else {
args = found
if i > end {
end = i
}
end += i
}
tc := &api.ToolCall{
@@ -217,93 +215,70 @@ func findTool(tools []api.Tool, buf []byte) (*api.Tool, int) {
// objects for functions that have all-optional parameters
// e.g. `{"name": "get_conditions", "arguments": {}}` will work but
// `{"name": "get_conditions"}` will not currently work
func findArguments(tool api.Tool, buffer []byte) (map[string]any, int) {
func findArguments(buffer []byte) (map[string]any, int) {
if len(buffer) == 0 {
return nil, 0
}
var braces int
var start int = -1
var end int
var object []byte
// find any outer json object
for i, c := range buffer {
if c == '{' {
braces++
if start == -1 {
if braces == 0 {
start = i
}
}
braces++
} else if c == '}' && braces > 0 {
braces--
if braces == 0 && start != -1 {
object := buffer[start : i+1]
if c == '}' {
if start != -1 {
braces--
if braces == 0 {
end = i + 1
object = buffer[start:end]
break
var data map[string]any
if err := json.Unmarshal(object, &data); err != nil {
start = -1
continue
}
}
}
}
if braces > 0 {
return nil, 0
}
var data map[string]any
if err := json.Unmarshal(object, &data); err != nil {
return nil, 0
}
var find func(obj any) map[string]any
find = func(obj any) map[string]any {
switch obj := obj.(type) {
case map[string]any:
valid := true
// check if all keys in the object exist in the tool's parameters
for key := range obj {
if _, exists := tool.Function.Parameters.Properties[key]; !exists {
valid = false
break
}
}
// check for required parameters
// TODO (jmorganca): this should error instead of silently failing
if valid {
for _, required := range tool.Function.Parameters.Required {
if _, exists := obj[required]; !exists {
valid = false
break
var findObject func(obj map[string]any) (map[string]any, bool)
findObject = func(obj map[string]any) (map[string]any, bool) {
if _, hasName := obj["name"]; hasName {
if args, ok := obj["arguments"].(map[string]any); ok {
return args, true
}
if args, ok := obj["parameters"].(map[string]any); ok {
return args, true
}
return nil, true
}
}
}
if valid {
return obj
}
for _, v := range obj {
switch child := v.(type) {
case map[string]any:
if result, found := findObject(child); found {
return result, true
}
case []any:
for _, item := range child {
if childObj, ok := item.(map[string]any); ok {
if result, found := findObject(childObj); found {
return result, true
}
}
}
}
}
for _, value := range obj {
if result := find(value); result != nil {
return result
return nil, false
}
}
case []any:
for _, item := range obj {
if result := find(item); result != nil {
return result
if args, found := findObject(data); found {
return args, i
}
return data, i
}
}
return nil
}
result := find(data)
if result != nil {
return result, end
}
return nil, 0

View File

@@ -227,13 +227,6 @@ func TestParser(t *testing.T) {
},
},
},
{
name: "invalid arguments",
inputs: []string{`<tool_call>{"name": "get_conditions", "arguments": {"city": "San Francisco"}}</tool_call>`},
content: "",
tmpl: qwen,
calls: nil,
},
{
name: "empty args",
inputs: []string{`<tool_call>{"name": "get_conditions", "arguments": {}}</tool_call>`},
@@ -249,13 +242,6 @@ func TestParser(t *testing.T) {
},
},
},
{
name: "missing required args",
inputs: []string{`<tool_call>{"name": "get_temperature", "arguments": {}}</tool_call>`},
content: "",
tmpl: qwen,
calls: nil,
},
{
name: "text before tool call",
inputs: []string{`Let me check the weather. <tool_call>{"name": "get_temperature", "arguments": {"city": "New York"}}</tool_call>`},
@@ -273,21 +259,6 @@ func TestParser(t *testing.T) {
},
},
},
{
name: "qwen no args tool call",
inputs: []string{`Let me say hello to the user. I'll use the say_hello tool <tool_call>{"name": "say_hello"}</tool_call>`},
content: "Let me say hello to the user. I'll use the say_hello tool ",
tmpl: qwen,
calls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Index: 0,
Name: "say_hello",
Arguments: api.ToolCallFunctionArguments{},
},
},
},
},
{
name: "qwen no args with text",
inputs: []string{"Let me say hello to the user. I'll use the say_hello tool. "},
@@ -521,52 +492,6 @@ func TestParser(t *testing.T) {
content: "for { fmt.Println(\"hello\") }",
tmpl: json,
},
{
name: "json no args tool call",
inputs: []string{
"{\"name\": \"say_hello\"}",
},
content: "",
tmpl: json,
calls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Index: 0,
Name: "say_hello",
Arguments: api.ToolCallFunctionArguments{},
},
},
},
},
{
name: "json no args no tool call",
inputs: []string{
"I'll use the say_hello tool to say hello to the user.",
},
content: "I'll use the say_hello tool to say hello to the user.",
tmpl: json,
calls: nil,
},
// TODO (jmorganca): this is a false positive, we should
// not be parsing this as a tool call
{
name: "json no args false positive",
inputs: []string{
`{say_hello!!!}`,
},
content: "",
tmpl: json,
calls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Index: 0,
Name: "say_hello",
Arguments: api.ToolCallFunctionArguments{},
},
},
},
},
{
name: "list multiple",
inputs: []string{
@@ -684,26 +609,6 @@ func TestParser(t *testing.T) {
tmpl: list,
calls: nil,
},
{
name: "list with no arguments",
inputs: []string{
"[",
"{",
"\"name\": \"say_hello\"",
"}",
},
content: "",
tmpl: list,
calls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Index: 0,
Name: "say_hello",
Arguments: api.ToolCallFunctionArguments{},
},
},
},
},
{
name: "tool name with collision",
inputs: []string{
@@ -711,7 +616,7 @@ func TestParser(t *testing.T) {
"{",
"\"name\": \"say_hello",
"_world\",",
"}",
"\"arguments\": {}}",
"}",
},
content: "",
@@ -733,13 +638,13 @@ func TestParser(t *testing.T) {
"{",
"\"name\": \"say_hello",
"_world\",",
"}",
"\"arguments\": {}}",
"</tool_call>",
"<tool_call>",
"{",
"\"name\": \"say_hello",
"\",",
"}",
"\"arguments\": {}}",
"</tool_call>",
},
content: "",
@@ -773,7 +678,7 @@ func TestParser(t *testing.T) {
{
name: "tool name with collision non streaming multiple",
inputs: []string{
`<tool_call>{"name": "say_hello"}</tool_call><tool_call>{"name": "say_hello_world"}`,
`<tool_call>{"name": "say_hello", "arguments": {}}</tool_call><tool_call>{"name": "say_hello_world", "arguments": {}}`,
},
content: "",
tmpl: qwen,
@@ -797,7 +702,7 @@ func TestParser(t *testing.T) {
{
name: "tool name with collision non streaming shorter",
inputs: []string{
`<tool_call>{"name": "say_hello"}</tool_call>`,
`<tool_call>{"name": "say_hello", "arguments": {}}</tool_call>`,
},
content: "",
tmpl: qwen,
@@ -814,7 +719,7 @@ func TestParser(t *testing.T) {
{
name: "tool name with collision non streaming longer",
inputs: []string{
`<tool_call>{"name": "say_hello_world"}</tool_call>`,
`<tool_call>{"name": "say_hello_world", "arguments": {}}</tool_call>`,
},
content: "",
tmpl: qwen,
@@ -871,6 +776,26 @@ func TestParser(t *testing.T) {
},
},
},
{
name: "args before name",
inputs: []string{
`<tool_call>{"arguments": {"a": "5", "b": "10"}, "name": "add"}</tool_call>`,
},
content: "",
tmpl: qwen,
calls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Index: 0,
Name: "add",
Arguments: api.ToolCallFunctionArguments{
"a": "5",
"b": "10",
},
},
},
},
},
}
for _, tt := range tests {
@@ -1167,75 +1092,25 @@ func TestFindTag(t *testing.T) {
}
func TestFindArguments(t *testing.T) {
tool := api.Tool{
Type: "function",
Function: api.ToolFunction{
Name: "get_temperature",
Description: "Retrieve the temperature for a given location",
Parameters: struct {
Type string `json:"type"`
Defs any `json:"$defs,omitempty"`
Items any `json:"items,omitempty"`
Required []string `json:"required"`
Properties map[string]struct {
Type api.PropertyType `json:"type"`
Items any `json:"items,omitempty"`
Description string `json:"description"`
Enum []any `json:"enum,omitempty"`
} `json:"properties"`
}{
Type: "object",
Properties: map[string]struct {
Type api.PropertyType `json:"type"`
Items any `json:"items,omitempty"`
Description string `json:"description"`
Enum []any `json:"enum,omitempty"`
}{
"format": {
Type: api.PropertyType{"string"},
Description: "The format to return the temperature in",
Enum: []any{"fahrenheit", "celsius"},
},
"location": {
Type: api.PropertyType{"string"},
Description: "The location to get the temperature for",
},
},
},
},
}
tool2 := api.Tool{
Type: "function",
Function: api.ToolFunction{
Name: "say_hello",
Description: "Say hello to the user",
},
}
tests := []struct {
name string
buffer []byte
want map[string]any
tool api.Tool
}{
{
name: "empty string",
buffer: []byte{},
want: nil,
tool: tool,
},
{
name: "whitespace only",
buffer: []byte(" \n\t "),
want: nil,
tool: tool,
},
{
name: "unbalanced braces - missing closing",
buffer: []byte(`{"format": "fahrenheit", "location": "San Francisco"`),
want: nil,
tool: tool,
},
{
name: "unbalanced braces - extra closing",
@@ -1243,13 +1118,11 @@ func TestFindArguments(t *testing.T) {
want: map[string]any{
"format": "fahrenheit",
},
tool: tool,
},
{
name: "invalid JSON",
buffer: []byte(`{format: fahrenheit, location: "San Francisco"}`),
want: nil,
tool: tool,
},
{
name: "valid json",
@@ -1258,7 +1131,6 @@ func TestFindArguments(t *testing.T) {
"format": "fahrenheit",
"location": "San Francisco, CA",
},
tool: tool,
},
{
name: "valid arguments with special tokens",
@@ -1267,16 +1139,14 @@ func TestFindArguments(t *testing.T) {
"format": "fahrenheit",
"location": "San Francisco, CA",
},
tool: tool,
},
{
name: "valid arguments in array",
buffer: []byte(`[{"arguments": {"format": "fahrenheit", "location": "San Francisco, CA"}}`),
buffer: []byte(`[{"name": "get_temperature", "arguments": {"format": "fahrenheit", "location": "San Francisco, CA"}}`),
want: map[string]any{
"format": "fahrenheit",
"location": "San Francisco, CA",
},
tool: tool,
},
{
name: "nested deep",
@@ -1285,7 +1155,6 @@ func TestFindArguments(t *testing.T) {
"format": "fahrenheit",
"location": "San Francisco, CA",
},
tool: tool,
},
{
name: "one arg",
@@ -1293,7 +1162,6 @@ func TestFindArguments(t *testing.T) {
want: map[string]any{
"location": "San Francisco, CA",
},
tool: tool,
},
{
name: "two args",
@@ -1302,13 +1170,6 @@ func TestFindArguments(t *testing.T) {
"location": "San Francisco, CA",
"format": "fahrenheit",
},
tool: tool,
},
{
name: "no args",
buffer: []byte(`{"name": "say_hello"}`),
want: nil,
tool: tool2,
},
{
name: "deepseek",
@@ -1316,7 +1177,6 @@ func TestFindArguments(t *testing.T) {
want: map[string]any{
"location": "Tokyo",
},
tool: tool,
},
{
name: "deepseek",
@@ -1324,13 +1184,12 @@ func TestFindArguments(t *testing.T) {
want: map[string]any{
"location": "Tokyo",
},
tool: tool,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, _ := findArguments(tt.tool, tt.buffer)
got, _ := findArguments(tt.buffer)
if diff := cmp.Diff(got, tt.want); diff != "" {
t.Errorf("scanArguments() args mismatch (-got +want):\n%s", diff)