mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
Merge branch 'ollama:main' into main
This commit is contained in:
@@ -624,6 +624,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [UnityCodeLama](https://github.com/HardCodeDev777/UnityCodeLama) (Unity Edtior tool to analyze scripts via Ollama)
|
- [UnityCodeLama](https://github.com/HardCodeDev777/UnityCodeLama) (Unity Edtior tool to analyze scripts via Ollama)
|
||||||
- [NativeMind](https://github.com/NativeMindBrowser/NativeMindExtension) (Private, on-device AI Assistant, no cloud dependencies)
|
- [NativeMind](https://github.com/NativeMindBrowser/NativeMindExtension) (Private, on-device AI Assistant, no cloud dependencies)
|
||||||
- [GMAI - Gradle Managed AI](https://gmai.premex.se/) (Gradle plugin for automated Ollama lifecycle management during build phases)
|
- [GMAI - Gradle Managed AI](https://gmai.premex.se/) (Gradle plugin for automated Ollama lifecycle management during build phases)
|
||||||
|
- [NOMYO Router](https://github.com/nomyo-ai/nomyo-router) (A transparent Ollama proxy with model deployment aware routing which auto-manages multiple Ollama instances in a given network)
|
||||||
|
|
||||||
### Supported backends
|
### Supported backends
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
package harmony
|
package harmony
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -292,7 +291,7 @@ func (h *HarmonyMessageHandler) AddContent(content string, toolParser *HarmonyTo
|
|||||||
for _, event := range events {
|
for _, event := range events {
|
||||||
switch event := event.(type) {
|
switch event := event.(type) {
|
||||||
case HarmonyEventHeaderComplete:
|
case HarmonyEventHeaderComplete:
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "harmony event header complete", "header", event.Header)
|
logutil.Trace("harmony event header complete", "header", event.Header)
|
||||||
switch event.Header.Channel {
|
switch event.Header.Channel {
|
||||||
case "analysis":
|
case "analysis":
|
||||||
if event.Header.Recipient != "" {
|
if event.Header.Recipient != "" {
|
||||||
@@ -315,7 +314,7 @@ func (h *HarmonyMessageHandler) AddContent(content string, toolParser *HarmonyTo
|
|||||||
h.state = harmonyMessageState_Normal
|
h.state = harmonyMessageState_Normal
|
||||||
}
|
}
|
||||||
case HarmonyEventContentEmitted:
|
case HarmonyEventContentEmitted:
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "harmony event content", "content", event.Content, "state", h.state)
|
logutil.Trace("harmony event content", "content", event.Content, "state", h.state)
|
||||||
if h.state == harmonyMessageState_Normal {
|
if h.state == harmonyMessageState_Normal {
|
||||||
contentSb.WriteString(event.Content)
|
contentSb.WriteString(event.Content)
|
||||||
} else if h.state == harmonyMessageState_Thinking {
|
} else if h.state == harmonyMessageState_Thinking {
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Daniel Hiltgen <daniel@ollama.com>
|
||||||
|
Date: Fri, 29 Aug 2025 16:53:08 -0700
|
||||||
|
Subject: [PATCH] harden uncaught exception registration
|
||||||
|
|
||||||
|
---
|
||||||
|
ggml/src/ggml.cpp | 8 ++++++--
|
||||||
|
1 file changed, 6 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/ggml/src/ggml.cpp b/ggml/src/ggml.cpp
|
||||||
|
index 0d388d45..f5bcb446 100644
|
||||||
|
--- a/ggml/src/ggml.cpp
|
||||||
|
+++ b/ggml/src/ggml.cpp
|
||||||
|
@@ -19,8 +19,12 @@ static bool ggml_uncaught_exception_init = []{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const auto prev{std::get_terminate()};
|
||||||
|
- GGML_ASSERT(prev != ggml_uncaught_exception);
|
||||||
|
- previous_terminate_handler = prev;
|
||||||
|
+ // GGML_ASSERT(prev != ggml_uncaught_exception);
|
||||||
|
+ if (prev != ggml_uncaught_exception) {
|
||||||
|
+ previous_terminate_handler = prev;
|
||||||
|
+ } else {
|
||||||
|
+ GGML_LOG_WARN("%s double registration of ggml_uncaught_exception\n", __func__);
|
||||||
|
+ }
|
||||||
|
std::set_terminate(ggml_uncaught_exception);
|
||||||
|
return true;
|
||||||
|
}();
|
||||||
@@ -678,8 +678,12 @@ func (s *ollamaServer) Load(ctx context.Context, gpus discover.GpuInfoList, requ
|
|||||||
|
|
||||||
if !(len(gpus) == 1 && gpus[0].Library == "cpu") {
|
if !(len(gpus) == 1 && gpus[0].Library == "cpu") {
|
||||||
for _, gpu := range gpus {
|
for _, gpu := range gpus {
|
||||||
|
available := gpu.FreeMemory - envconfig.GpuOverhead() - gpu.MinimumMemory
|
||||||
|
if gpu.FreeMemory < envconfig.GpuOverhead()+gpu.MinimumMemory {
|
||||||
|
available = 0
|
||||||
|
}
|
||||||
slog.Info("gpu memory", "id", gpu.ID,
|
slog.Info("gpu memory", "id", gpu.ID,
|
||||||
"available", format.HumanBytes2(gpu.FreeMemory-envconfig.GpuOverhead()-gpu.MinimumMemory),
|
"available", format.HumanBytes2(available),
|
||||||
"free", format.HumanBytes2(gpu.FreeMemory),
|
"free", format.HumanBytes2(gpu.FreeMemory),
|
||||||
"minimum", format.HumanBytes2(gpu.MinimumMemory),
|
"minimum", format.HumanBytes2(gpu.MinimumMemory),
|
||||||
"overhead", format.HumanBytes2(envconfig.GpuOverhead()))
|
"overhead", format.HumanBytes2(envconfig.GpuOverhead()))
|
||||||
@@ -861,7 +865,7 @@ func (s *ollamaServer) createLayout(systemInfo discover.SystemInfo, systemGPUs d
|
|||||||
}
|
}
|
||||||
layers[i] += memory.CPU.Weights[i].Size
|
layers[i] += memory.CPU.Weights[i].Size
|
||||||
layers[i] += memory.CPU.Cache[i].Size
|
layers[i] += memory.CPU.Cache[i].Size
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "layer to assign", "layer", i, "size", format.HumanBytes2(layers[i]))
|
logutil.Trace("layer to assign", "layer", i, "size", format.HumanBytes2(layers[i]))
|
||||||
}
|
}
|
||||||
|
|
||||||
gpuLayers := ml.GPULayersList{}
|
gpuLayers := ml.GPULayersList{}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package logutil
|
package logutil
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -27,3 +28,11 @@ func NewLogger(w io.Writer, level slog.Level) *slog.Logger {
|
|||||||
},
|
},
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Trace(msg string, args ...any) {
|
||||||
|
slog.Log(context.TODO(), LevelTrace, msg, args...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TraceContext(ctx context.Context, msg string, args ...any) {
|
||||||
|
slog.Log(ctx, LevelTrace, msg, args...)
|
||||||
|
}
|
||||||
|
|||||||
@@ -266,7 +266,7 @@ func (m DeviceMemory) LogValue() slog.Value {
|
|||||||
// allocation is guaranteed to be provided so that if it failed, the caller can
|
// allocation is guaranteed to be provided so that if it failed, the caller can
|
||||||
// accommodate that to make forward progress.
|
// accommodate that to make forward progress.
|
||||||
type BackendMemory struct {
|
type BackendMemory struct {
|
||||||
// InputsWeights are always located on the CPU and cannot be moved
|
// InputWeights are always located on the CPU and cannot be moved
|
||||||
InputWeights Memory
|
InputWeights Memory
|
||||||
|
|
||||||
// CPU model components are located in system memory. This does not
|
// CPU model components are located in system memory. This does not
|
||||||
|
|||||||
@@ -271,7 +271,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
|
|||||||
tt := C.ggml_new_tensor(ctxs[bt], kind, C.int(len(t.source.Shape)), (*C.int64_t)(unsafe.Pointer(&t.source.Shape[0])))
|
tt := C.ggml_new_tensor(ctxs[bt], kind, C.int(len(t.source.Shape)), (*C.int64_t)(unsafe.Pointer(&t.source.Shape[0])))
|
||||||
C.ggml_set_name(tt, cname)
|
C.ggml_set_name(tt, cname)
|
||||||
|
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "created tensor", "name", name, "shape", t.source.Shape, "dtype", t.source.Kind, "buffer_type", C.GoString(C.ggml_backend_buft_name(bt)))
|
logutil.Trace("created tensor", "name", name, "shape", t.source.Shape, "dtype", t.source.Kind, "buffer_type", C.GoString(C.ggml_backend_buft_name(bt)))
|
||||||
|
|
||||||
size := pad(C.ggml_backend_buft_get_alloc_size(bt, tt), C.ggml_backend_buft_get_alignment(bt))
|
size := pad(C.ggml_backend_buft_get_alloc_size(bt, tt), C.ggml_backend_buft_get_alignment(bt))
|
||||||
if layer == -1 {
|
if layer == -1 {
|
||||||
@@ -378,7 +378,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for bs := range maps.Values(bbs) {
|
for bs := range maps.Values(bbs) {
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "model weights", "buffer", C.GoString(C.ggml_backend_buffer_name(bs)),
|
logutil.Trace("model weights", "buffer", C.GoString(C.ggml_backend_buffer_name(bs)),
|
||||||
"size", format.HumanBytes2(uint64(C.ggml_backend_buffer_get_size(bs))))
|
"size", format.HumanBytes2(uint64(C.ggml_backend_buffer_get_size(bs))))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -811,7 +811,7 @@ func (c *Context) Reserve() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "compute graph", "backend", C.GoString(C.ggml_backend_name(c.b.schedBackends[i])),
|
logutil.Trace("compute graph", "backend", C.GoString(C.ggml_backend_name(c.b.schedBackends[i])),
|
||||||
"buffer_type", C.GoString(C.ggml_backend_buft_name(c.b.schedBufts[i])), "size", format.HumanBytes2(uint64(bufferStatus.size)))
|
"buffer_type", C.GoString(C.ggml_backend_buft_name(c.b.schedBufts[i])), "size", format.HumanBytes2(uint64(bufferStatus.size)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
6
ml/backend/ggml/ggml/src/ggml.cpp
vendored
6
ml/backend/ggml/ggml/src/ggml.cpp
vendored
@@ -19,8 +19,12 @@ static bool ggml_uncaught_exception_init = []{
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const auto prev{std::get_terminate()};
|
const auto prev{std::get_terminate()};
|
||||||
GGML_ASSERT(prev != ggml_uncaught_exception);
|
// GGML_ASSERT(prev != ggml_uncaught_exception);
|
||||||
|
if (prev != ggml_uncaught_exception) {
|
||||||
previous_terminate_handler = prev;
|
previous_terminate_handler = prev;
|
||||||
|
} else {
|
||||||
|
GGML_LOG_WARN("%s double registration of ggml_uncaught_exception\n", __func__);
|
||||||
|
}
|
||||||
std::set_terminate(ggml_uncaught_exception);
|
std::set_terminate(ggml_uncaught_exception);
|
||||||
return true;
|
return true;
|
||||||
}();
|
}();
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package model
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"cmp"
|
"cmp"
|
||||||
"context"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"iter"
|
"iter"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
@@ -202,7 +201,7 @@ func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "encoded", "string", s, "ids", ids)
|
logutil.Trace("encoded", "string", s, "ids", ids)
|
||||||
|
|
||||||
if addSpecial && len(ids) > 0 {
|
if addSpecial && len(ids) > 0 {
|
||||||
ids = bpe.vocab.addSpecials(ids)
|
ids = bpe.vocab.addSpecials(ids)
|
||||||
@@ -243,6 +242,6 @@ func (bpe BytePairEncoding) Decode(ids []int32) (string, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "decoded", "string", sb.String(), "from", lazyIdsString{ids: ids})
|
logutil.Trace("decoded", "string", sb.String(), "from", lazyIdsString{ids: ids})
|
||||||
return sb.String(), nil
|
return sb.String(), nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,12 +1,10 @@
|
|||||||
package model
|
package model
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
_ "image/jpeg"
|
_ "image/jpeg"
|
||||||
_ "image/png"
|
_ "image/png"
|
||||||
"log/slog"
|
|
||||||
"os"
|
"os"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -198,7 +196,7 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
|||||||
names := fn(tagsCopy)
|
names := fn(tagsCopy)
|
||||||
for _, name := range names {
|
for _, name := range names {
|
||||||
if tensor := base.Backend().Get(strings.Join(name, ".")); tensor != nil {
|
if tensor := base.Backend().Get(strings.Join(name, ".")); tensor != nil {
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "found tensor", "", tensor)
|
logutil.Trace("found tensor", "", tensor)
|
||||||
vv.Set(reflect.ValueOf(tensor))
|
vv.Set(reflect.ValueOf(tensor))
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package model
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"container/heap"
|
"container/heap"
|
||||||
"context"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -25,7 +24,7 @@ func (spm SentencePieceModel) Vocabulary() *Vocabulary {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func NewSentencePieceModel(vocab *Vocabulary) SentencePieceModel {
|
func NewSentencePieceModel(vocab *Vocabulary) SentencePieceModel {
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "Tokens", "num tokens", len(vocab.Values), "vals", vocab.Values[:5], "scores", vocab.Scores[:5], "types", vocab.Types[:5])
|
logutil.Trace("Tokens", "num tokens", len(vocab.Values), "vals", vocab.Values[:5], "scores", vocab.Scores[:5], "types", vocab.Types[:5])
|
||||||
|
|
||||||
counter := map[int]int{}
|
counter := map[int]int{}
|
||||||
var maxTokenLen int
|
var maxTokenLen int
|
||||||
@@ -39,7 +38,7 @@ func NewSentencePieceModel(vocab *Vocabulary) SentencePieceModel {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "Token counts", "normal", counter[TOKEN_TYPE_NORMAL], "unknown", counter[TOKEN_TYPE_UNKNOWN], "control", counter[TOKEN_TYPE_CONTROL],
|
logutil.Trace("Token counts", "normal", counter[TOKEN_TYPE_NORMAL], "unknown", counter[TOKEN_TYPE_UNKNOWN], "control", counter[TOKEN_TYPE_CONTROL],
|
||||||
"user defined", counter[TOKEN_TYPE_USER_DEFINED], "unused", counter[TOKEN_TYPE_UNUSED], "byte", counter[TOKEN_TYPE_BYTE],
|
"user defined", counter[TOKEN_TYPE_USER_DEFINED], "unused", counter[TOKEN_TYPE_UNUSED], "byte", counter[TOKEN_TYPE_BYTE],
|
||||||
"max token len", maxTokenLen)
|
"max token len", maxTokenLen)
|
||||||
|
|
||||||
@@ -182,7 +181,7 @@ func (spm SentencePieceModel) Encode(s string, addSpecial bool) ([]int32, error)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "encoded", "string", s, "ids", ids)
|
logutil.Trace("encoded", "string", s, "ids", ids)
|
||||||
|
|
||||||
if addSpecial && len(ids) > 0 {
|
if addSpecial && len(ids) > 0 {
|
||||||
ids = spm.vocab.addSpecials(ids)
|
ids = spm.vocab.addSpecials(ids)
|
||||||
@@ -246,6 +245,6 @@ func (spm SentencePieceModel) Decode(ids []int32) (string, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "decoded", "ids", ids, "string", sb.String())
|
logutil.Trace("decoded", "ids", ids, "string", sb.String())
|
||||||
return sb.String(), nil
|
return sb.String(), nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -429,12 +429,12 @@ func (s *Server) forwardBatch(pendingBatch batchState) (nextBatch batchState, er
|
|||||||
// before setting up the next batch so the seqs inputs are ready to receive their
|
// before setting up the next batch so the seqs inputs are ready to receive their
|
||||||
// token values and we get the correct input pointers for the batchInputs
|
// token values and we get the correct input pointers for the batchInputs
|
||||||
if pendingBatch.ctx != nil {
|
if pendingBatch.ctx != nil {
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "forwardBatch waiting for compute to start", "pendingBatch.id", pendingBatch.id)
|
logutil.Trace("forwardBatch waiting for compute to start", "pendingBatch.id", pendingBatch.id)
|
||||||
<-pendingBatch.computeStartedCh
|
<-pendingBatch.computeStartedCh
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "forwardBatch compute started, setting up next batch", "pendingBatch.id", pendingBatch.id, "id", s.batchID)
|
logutil.Trace("forwardBatch compute started, setting up next batch", "pendingBatch.id", pendingBatch.id, "id", s.batchID)
|
||||||
nextBatch.inputsReadyCh = pendingBatch.outputsReadyCh // Chain the ouputs from the pending batch to the next inputs batch
|
nextBatch.inputsReadyCh = pendingBatch.outputsReadyCh // Chain the ouputs from the pending batch to the next inputs batch
|
||||||
} else {
|
} else {
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "forwardBatch no pending batch detected", "batchID", s.batchID)
|
logutil.Trace("forwardBatch no pending batch detected", "batchID", s.batchID)
|
||||||
// No pendingBatch, so the inputs will be ready in the seqs immediately
|
// No pendingBatch, so the inputs will be ready in the seqs immediately
|
||||||
nextBatch.inputsReadyCh = make(chan struct{}, 1)
|
nextBatch.inputsReadyCh = make(chan struct{}, 1)
|
||||||
nextBatch.inputsReadyCh <- struct{}{}
|
nextBatch.inputsReadyCh <- struct{}{}
|
||||||
@@ -546,7 +546,7 @@ func (s *Server) forwardBatch(pendingBatch batchState) (nextBatch batchState, er
|
|||||||
if i+1 == len(seq.inputs) {
|
if i+1 == len(seq.inputs) {
|
||||||
batch.Outputs = append(batch.Outputs, int32(len(batchInputs)-1))
|
batch.Outputs = append(batch.Outputs, int32(len(batchInputs)-1))
|
||||||
}
|
}
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "forwardBatch iBatch", "batchID", s.batchID, "seqIdx", seqIdx, "seq.iBatch", seq.iBatch, "i+1", i+1, "len(seq.inputs)", len(seq.inputs))
|
logutil.Trace("forwardBatch iBatch", "batchID", s.batchID, "seqIdx", seqIdx, "seq.iBatch", seq.iBatch, "i+1", i+1, "len(seq.inputs)", len(seq.inputs))
|
||||||
seq.pendingInputs = append(seq.pendingInputs, inp)
|
seq.pendingInputs = append(seq.pendingInputs, inp)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -560,7 +560,7 @@ func (s *Server) forwardBatch(pendingBatch batchState) (nextBatch batchState, er
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(batchInputs) == 0 {
|
if len(batchInputs) == 0 {
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "forwardBatch no batchInputs, going idle", "batchID", s.batchID)
|
logutil.Trace("forwardBatch no batchInputs, going idle", "batchID", s.batchID)
|
||||||
nextBatch.ctx.Close()
|
nextBatch.ctx.Close()
|
||||||
nextBatch.ctx = nil
|
nextBatch.ctx = nil
|
||||||
return
|
return
|
||||||
@@ -589,14 +589,14 @@ func (s *Server) computeBatch(activeBatch batchState) {
|
|||||||
defer activeBatch.ctx.Close()
|
defer activeBatch.ctx.Close()
|
||||||
|
|
||||||
// Wait until inputs are ready
|
// Wait until inputs are ready
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "computeBatch: waiting for inputs to be ready", "batchID", activeBatch.id)
|
logutil.Trace("computeBatch: waiting for inputs to be ready", "batchID", activeBatch.id)
|
||||||
<-activeBatch.inputsReadyCh
|
<-activeBatch.inputsReadyCh
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "computeBatch: inputs are ready", "batchID", activeBatch.id)
|
logutil.Trace("computeBatch: inputs are ready", "batchID", activeBatch.id)
|
||||||
|
|
||||||
// Once we complete, signal the next batch of inputs are ready
|
// Once we complete, signal the next batch of inputs are ready
|
||||||
// This will unblock the next computeBatch, or forwardBatch if new seqs come in
|
// This will unblock the next computeBatch, or forwardBatch if new seqs come in
|
||||||
defer func() {
|
defer func() {
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "computeBatch: outputs are ready", "batchID", activeBatch.id)
|
logutil.Trace("computeBatch: outputs are ready", "batchID", activeBatch.id)
|
||||||
activeBatch.outputsReadyCh <- struct{}{}
|
activeBatch.outputsReadyCh <- struct{}{}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
@@ -626,7 +626,7 @@ func (s *Server) computeBatch(activeBatch batchState) {
|
|||||||
// Detect if the sequence we're processing has already been completed and replaced
|
// Detect if the sequence we're processing has already been completed and replaced
|
||||||
// with a new sequence
|
// with a new sequence
|
||||||
if seq != activeBatch.seqs[i] {
|
if seq != activeBatch.seqs[i] {
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "computeBatch: sequence replaced, discarding its results", "batchID", activeBatch.id, "seqIdx", i)
|
logutil.Trace("computeBatch: sequence replaced, discarding its results", "batchID", activeBatch.id, "seqIdx", i)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -666,18 +666,18 @@ func (s *Server) computeBatch(activeBatch batchState) {
|
|||||||
activeBatch.batch.Inputs.SetValueFromIntSlice(batchInputs)
|
activeBatch.batch.Inputs.SetValueFromIntSlice(batchInputs)
|
||||||
activeBatch.ctx.ComputeWithNotify(
|
activeBatch.ctx.ComputeWithNotify(
|
||||||
func() {
|
func() {
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "computeBatch: signaling computeStartedCh", "batchID", activeBatch.id)
|
logutil.Trace("computeBatch: signaling computeStartedCh", "batchID", activeBatch.id)
|
||||||
activeBatch.computeStartedCh <- struct{}{}
|
activeBatch.computeStartedCh <- struct{}{}
|
||||||
},
|
},
|
||||||
activeBatch.modelOutput)
|
activeBatch.modelOutput)
|
||||||
logits := activeBatch.modelOutput.Floats()
|
logits := activeBatch.modelOutput.Floats()
|
||||||
|
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "computeBatch: logits ready", "batchID", activeBatch.id)
|
logutil.Trace("computeBatch: logits ready", "batchID", activeBatch.id)
|
||||||
|
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "computeBatch: decoding", "batchID", activeBatch.id)
|
logutil.Trace("computeBatch: decoding", "batchID", activeBatch.id)
|
||||||
for i, seq := range s.seqs {
|
for i, seq := range s.seqs {
|
||||||
if seq == nil || nextBatchTokens[i] == nil {
|
if seq == nil || nextBatchTokens[i] == nil {
|
||||||
continue
|
continue
|
||||||
@@ -697,7 +697,7 @@ func (s *Server) computeBatch(activeBatch batchState) {
|
|||||||
|
|
||||||
// sample a token
|
// sample a token
|
||||||
vocabSize := len(logits) / len(activeBatch.batch.Outputs)
|
vocabSize := len(logits) / len(activeBatch.batch.Outputs)
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "computeBatch: vocab details", "batchID", activeBatch.id, "seqIdx", i, "len(logits)", len(logits), "len(activeBatch.batch.Outputs)", len(activeBatch.batch.Outputs), "vocabSize", vocabSize, "iBatches", iBatches)
|
logutil.Trace("computeBatch: vocab details", "batchID", activeBatch.id, "seqIdx", i, "len(logits)", len(logits), "len(activeBatch.batch.Outputs)", len(activeBatch.batch.Outputs), "vocabSize", vocabSize, "iBatches", iBatches)
|
||||||
token, err := seq.sampler.Sample(logits[iBatches[i]*vocabSize : (iBatches[i]+1)*vocabSize])
|
token, err := seq.sampler.Sample(logits[iBatches[i]*vocabSize : (iBatches[i]+1)*vocabSize])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
s.hardErrCh <- fmt.Errorf("failed to sample token: %w", err)
|
s.hardErrCh <- fmt.Errorf("failed to sample token: %w", err)
|
||||||
@@ -711,7 +711,7 @@ func (s *Server) computeBatch(activeBatch batchState) {
|
|||||||
// TODO (jmorganca): we should send this back
|
// TODO (jmorganca): we should send this back
|
||||||
// as it's important for the /api/generate context
|
// as it's important for the /api/generate context
|
||||||
// seq.responses <- piece
|
// seq.responses <- piece
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "computeBatch: EOS", "batchID", activeBatch.id, "seqIdx", i)
|
logutil.Trace("computeBatch: EOS", "batchID", activeBatch.id, "seqIdx", i)
|
||||||
s.removeSequence(i, llm.DoneReasonStop)
|
s.removeSequence(i, llm.DoneReasonStop)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user