mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-23 23:18:26 +00:00
Merge branch 'ollama:main' into main
This commit is contained in:
@@ -409,6 +409,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
|
||||
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
|
||||
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
|
||||
- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
|
||||
|
||||
### Supported backends
|
||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
||||
|
||||
17
cmd/cmd.go
17
cmd/cmd.go
@@ -25,7 +25,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/containerd/console"
|
||||
|
||||
"github.com/mattn/go-runewidth"
|
||||
"github.com/olekukonko/tablewriter"
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/crypto/ssh"
|
||||
@@ -744,7 +744,8 @@ func displayResponse(content string, wordWrap bool, state *displayResponseState)
|
||||
if wordWrap && termWidth >= 10 {
|
||||
for _, ch := range content {
|
||||
if state.lineLength+1 > termWidth-5 {
|
||||
if len(state.wordBuffer) > termWidth-10 {
|
||||
|
||||
if runewidth.StringWidth(state.wordBuffer) > termWidth-10 {
|
||||
fmt.Printf("%s%c", state.wordBuffer, ch)
|
||||
state.wordBuffer = ""
|
||||
state.lineLength = 0
|
||||
@@ -752,12 +753,18 @@ func displayResponse(content string, wordWrap bool, state *displayResponseState)
|
||||
}
|
||||
|
||||
// backtrack the length of the last word and clear to the end of the line
|
||||
fmt.Printf("\x1b[%dD\x1b[K\n", len(state.wordBuffer))
|
||||
fmt.Printf("\x1b[%dD\x1b[K\n", runewidth.StringWidth(state.wordBuffer))
|
||||
fmt.Printf("%s%c", state.wordBuffer, ch)
|
||||
state.lineLength = len(state.wordBuffer) + 1
|
||||
chWidth := runewidth.RuneWidth(ch)
|
||||
|
||||
state.lineLength = runewidth.StringWidth(state.wordBuffer) + chWidth
|
||||
} else {
|
||||
fmt.Print(string(ch))
|
||||
state.lineLength += 1
|
||||
state.lineLength += runewidth.RuneWidth(ch)
|
||||
if runewidth.RuneWidth(ch) >= 2 {
|
||||
state.wordBuffer = ""
|
||||
continue
|
||||
}
|
||||
|
||||
switch ch {
|
||||
case ' ':
|
||||
|
||||
@@ -19,6 +19,11 @@ import (
|
||||
)
|
||||
|
||||
func TestMaxQueue(t *testing.T) {
|
||||
if os.Getenv("OLLAMA_TEST_EXISTING") != "" {
|
||||
t.Skip("Max Queue test requires spawing a local server so we can adjust the queue size")
|
||||
return
|
||||
}
|
||||
|
||||
// Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU
|
||||
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
|
||||
threadCount := 32
|
||||
@@ -109,9 +114,9 @@ func TestMaxQueue(t *testing.T) {
|
||||
slog.Info("generate done, waiting for embeds")
|
||||
embedwg.Wait()
|
||||
|
||||
slog.Info("embeds completed", "success", succesCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount)
|
||||
require.Equal(t, resetByPeerCount, 0, "Connections reset by peer, have you updated your fd and socket limits?")
|
||||
require.True(t, busyCount > 0, "no requests hit busy error but some should have")
|
||||
require.True(t, canceledCount == 0, "no requests should have been canceled due to timeout")
|
||||
|
||||
slog.Info("embeds completed", "success", succesCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount)
|
||||
}
|
||||
|
||||
Submodule llm/llama.cpp updated: 952d03dbea...614d3b914e
@@ -1,24 +0,0 @@
|
||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||
index e3c9bcd4..b43f892d 100644
|
||||
--- a/examples/llava/clip.cpp
|
||||
+++ b/examples/llava/clip.cpp
|
||||
@@ -573,14 +573,16 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||
struct ggml_tensor * embeddings = inp;
|
||||
if (ctx->has_class_embedding) {
|
||||
embeddings = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hidden_size, num_positions, batch_size);
|
||||
+ }
|
||||
+ ggml_set_name(embeddings, "embeddings");
|
||||
+ ggml_set_input(embeddings);
|
||||
+
|
||||
+ if (ctx->has_class_embedding) {
|
||||
embeddings = ggml_acc(ctx0, embeddings, model.class_embedding,
|
||||
embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], 0);
|
||||
embeddings = ggml_acc(ctx0, embeddings, inp,
|
||||
embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], model.class_embedding->nb[1]);
|
||||
}
|
||||
- ggml_set_name(embeddings, "embeddings");
|
||||
- ggml_set_input(embeddings);
|
||||
-
|
||||
|
||||
struct ggml_tensor * positions = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_positions);
|
||||
ggml_set_name(positions, "positions");
|
||||
@@ -1086,7 +1086,7 @@ func Serve(ln net.Listener) error {
|
||||
return err
|
||||
}
|
||||
<-ctx.Done()
|
||||
return err
|
||||
return nil
|
||||
}
|
||||
|
||||
func waitForStream(c *gin.Context, ch chan interface{}) {
|
||||
|
||||
Reference in New Issue
Block a user