diff --git a/README.md b/README.md index 2966ef5c..e3505a44 100644 --- a/README.md +++ b/README.md @@ -409,6 +409,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend) - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support) - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation) +- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities. ### Supported backends - [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov. diff --git a/cmd/cmd.go b/cmd/cmd.go index cae35f51..dff8d7c1 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -25,7 +25,7 @@ import ( "time" "github.com/containerd/console" - + "github.com/mattn/go-runewidth" "github.com/olekukonko/tablewriter" "github.com/spf13/cobra" "golang.org/x/crypto/ssh" @@ -744,7 +744,8 @@ func displayResponse(content string, wordWrap bool, state *displayResponseState) if wordWrap && termWidth >= 10 { for _, ch := range content { if state.lineLength+1 > termWidth-5 { - if len(state.wordBuffer) > termWidth-10 { + + if runewidth.StringWidth(state.wordBuffer) > termWidth-10 { fmt.Printf("%s%c", state.wordBuffer, ch) state.wordBuffer = "" state.lineLength = 0 @@ -752,12 +753,18 @@ func displayResponse(content string, wordWrap bool, state *displayResponseState) } // backtrack the length of the last word and clear to the end of the line - fmt.Printf("\x1b[%dD\x1b[K\n", len(state.wordBuffer)) + fmt.Printf("\x1b[%dD\x1b[K\n", runewidth.StringWidth(state.wordBuffer)) fmt.Printf("%s%c", state.wordBuffer, ch) - state.lineLength = len(state.wordBuffer) + 1 + chWidth := runewidth.RuneWidth(ch) + + state.lineLength = runewidth.StringWidth(state.wordBuffer) + chWidth } else { fmt.Print(string(ch)) - state.lineLength += 1 + state.lineLength += runewidth.RuneWidth(ch) + if runewidth.RuneWidth(ch) >= 2 { + state.wordBuffer = "" + continue + } switch ch { case ' ': diff --git a/integration/max_queue_test.go b/integration/max_queue_test.go index 43b15c6c..dfa5eae0 100644 --- a/integration/max_queue_test.go +++ b/integration/max_queue_test.go @@ -19,6 +19,11 @@ import ( ) func TestMaxQueue(t *testing.T) { + if os.Getenv("OLLAMA_TEST_EXISTING") != "" { + t.Skip("Max Queue test requires spawing a local server so we can adjust the queue size") + return + } + // Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU // Also note that by default Darwin can't sustain > ~128 connections without adjusting limits threadCount := 32 @@ -109,9 +114,9 @@ func TestMaxQueue(t *testing.T) { slog.Info("generate done, waiting for embeds") embedwg.Wait() + slog.Info("embeds completed", "success", succesCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount) require.Equal(t, resetByPeerCount, 0, "Connections reset by peer, have you updated your fd and socket limits?") require.True(t, busyCount > 0, "no requests hit busy error but some should have") require.True(t, canceledCount == 0, "no requests should have been canceled due to timeout") - slog.Info("embeds completed", "success", succesCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount) } diff --git a/llm/llama.cpp b/llm/llama.cpp index 952d03db..614d3b91 160000 --- a/llm/llama.cpp +++ b/llm/llama.cpp @@ -1 +1 @@ -Subproject commit 952d03dbead16e4dbdd1d3458486340673cc2465 +Subproject commit 614d3b914e1c3e02596f869649eb4f1d3b68614d diff --git a/llm/patches/05-clip-fix.diff b/llm/patches/05-clip-fix.diff deleted file mode 100644 index 3f68a5bb..00000000 --- a/llm/patches/05-clip-fix.diff +++ /dev/null @@ -1,24 +0,0 @@ -diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp -index e3c9bcd4..b43f892d 100644 ---- a/examples/llava/clip.cpp -+++ b/examples/llava/clip.cpp -@@ -573,14 +573,16 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 - struct ggml_tensor * embeddings = inp; - if (ctx->has_class_embedding) { - embeddings = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hidden_size, num_positions, batch_size); -+ } -+ ggml_set_name(embeddings, "embeddings"); -+ ggml_set_input(embeddings); -+ -+ if (ctx->has_class_embedding) { - embeddings = ggml_acc(ctx0, embeddings, model.class_embedding, - embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], 0); - embeddings = ggml_acc(ctx0, embeddings, inp, - embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], model.class_embedding->nb[1]); - } -- ggml_set_name(embeddings, "embeddings"); -- ggml_set_input(embeddings); -- - - struct ggml_tensor * positions = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_positions); - ggml_set_name(positions, "positions"); diff --git a/server/routes.go b/server/routes.go index e991e774..5fbc2b54 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1086,7 +1086,7 @@ func Serve(ln net.Listener) error { return err } <-ctx.Done() - return err + return nil } func waitForStream(c *gin.Context, ch chan interface{}) {