diff --git a/README.md b/README.md index 313a996e..3ec519d8 100644 --- a/README.md +++ b/README.md @@ -316,7 +316,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm) - [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat) - [Ollama-chats RPG](https://github.com/drazdra/ollama-chats) -- [QA-Pilot](https://github.com/reid41/QA-Pilot) (Chat with Code Repository) +- [QA-Pilot](https://github.com/reid41/QA-Pilot) (Interactive chat tool that can leverage Ollama models for rapid understanding and navigation of GitHub code repositories) - [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases) - [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG) - [RAGFlow](https://github.com/infiniflow/ragflow) (Open-source Retrieval-Augmented Generation engine based on deep document understanding) @@ -326,6 +326,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Ollama RAG Chatbot](https://github.com/datvodinh/rag-chatbot.git) (Local Chat with multiple PDFs using Ollama and RAG) - [BrainSoup](https://www.nurgo-software.com/products/brainsoup) (Flexible native client with RAG & multi-agent automation) - [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends) +- [RWKV-Runner](https://github.com/josStorer/RWKV-Runner) (RWKV offline LLM deployment tool, also usable as a client for ChatGPT and Ollama) - [Ollama Grid Search](https://github.com/dezoito/ollama-grid-search) (app to evaluate and compare models) - [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama) - [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS) @@ -368,7 +369,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Perfect Memory AI](https://www.perfectmemory.ai/) (Productivity AI assists personalized by what you have seen on your screen, heard and said in the meetings) - [Hexabot](https://github.com/hexastack/hexabot) (A conversational AI builder) - [Reddit Rate](https://github.com/rapidarchitect/reddit_analyzer) (Search and Rate Reddit topics with a weighted summation) -- [OpenTalkGpt](https://github.com/adarshM84/OpenTalkGpt) +- [OpenTalkGpt](https://github.com/adarshM84/OpenTalkGpt) (Chrome Extension to manage open-source models supported by Ollama, create custom models, and chat with models from a user-friendly UI) - [VT](https://github.com/vinhnx/vt.ai) (A minimal multimodal AI chat app, with dynamic conversation routing. Supports local models via Ollama) - [Nosia](https://github.com/nosia-ai/nosia) (Easy to install and use RAG platform based on Ollama) - [Witsy](https://github.com/nbonamy/witsy) (An AI Desktop application avaiable for Mac/Windows/Linux) @@ -394,7 +395,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Oatmeal](https://github.com/dustinblackman/oatmeal) - [cmdh](https://github.com/pgibler/cmdh) - [ooo](https://github.com/npahlfer/ooo) -- [shell-pilot](https://github.com/reid41/shell-pilot) +- [shell-pilot](https://github.com/reid41/shell-pilot)(Interact with models via pure shell scripts on Linux or macOS) - [tenere](https://github.com/pythops/tenere) - [llm-ollama](https://github.com/taketwo/llm-ollama) for [Datasette's LLM CLI](https://llm.datasette.io/en/stable/). - [typechat-cli](https://github.com/anaisbetts/typechat-cli) @@ -410,6 +411,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [bb7](https://github.com/drunkwcodes/bb7) - [SwollamaCLI](https://github.com/marcusziade/Swollama) bundled with the Swollama Swift package. [Demo](https://github.com/marcusziade/Swollama?tab=readme-ov-file#cli-usage) - [aichat](https://github.com/sigoden/aichat) All-in-one LLM CLI tool featuring Shell Assistant, Chat-REPL, RAG, AI tools & agents, with access to OpenAI, Claude, Gemini, Ollama, Groq, and more. +- [PowershAI](https://github.com/rrg92/powershai) PowerShell module that brings AI to terminal on Windows, including support for Ollama - [orbiton](https://github.com/xyproto/orbiton) Configuration-free text editor and IDE with support for tab completion with Ollama. ### Apple Vision Pro @@ -514,6 +516,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend) - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support) - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation) +- [ChatGPTBox: All in one browser extension](https://github.com/josStorer/chatGPTBox) with [Integrating Tutorial](https://github.com/josStorer/chatGPTBox/issues/616#issuecomment-1975186467) - [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities. - [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server) - [Terraform AWS Ollama & Open WebUI](https://github.com/xuyangbocn/terraform-aws-self-host-llm) (A Terraform module to deploy on AWS a ready-to-use Ollama service, together with its front end Open WebUI service.) @@ -529,3 +532,7 @@ See the [API documentation](./docs/api.md) for all endpoints. ### Supported backends - [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov. + +### Observability + +- [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics. diff --git a/cmd/cmd.go b/cmd/cmd.go index 91819c8e..fad06ffd 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -19,7 +19,6 @@ import ( "os" "os/signal" "path/filepath" - "regexp" "runtime" "strconv" "strings" @@ -35,14 +34,11 @@ import ( "golang.org/x/term" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/auth" "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/format" "github.com/ollama/ollama/parser" "github.com/ollama/ollama/progress" "github.com/ollama/ollama/server" - "github.com/ollama/ollama/types/errtypes" - "github.com/ollama/ollama/types/model" "github.com/ollama/ollama/version" ) @@ -456,6 +452,10 @@ func RunHandler(cmd *cobra.Command, args []string) error { if len(prompts) > 0 { interactive = false } + // Be quiet if we're redirecting to a pipe or file + if !term.IsTerminal(int(os.Stdout.Fd())) { + interactive = false + } nowrap, err := cmd.Flags().GetBool("nowordwrap") if err != nil { @@ -512,47 +512,6 @@ func RunHandler(cmd *cobra.Command, args []string) error { return generate(cmd, opts) } -func errFromUnknownKey(unknownKeyErr error) error { - // find SSH public key in the error message - sshKeyPattern := `ssh-\w+ [^\s"]+` - re := regexp.MustCompile(sshKeyPattern) - matches := re.FindStringSubmatch(unknownKeyErr.Error()) - - if len(matches) > 0 { - serverPubKey := matches[0] - - localPubKey, err := auth.GetPublicKey() - if err != nil { - return unknownKeyErr - } - - if runtime.GOOS == "linux" && serverPubKey != localPubKey { - // try the ollama service public key - svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub") - if err != nil { - return unknownKeyErr - } - localPubKey = strings.TrimSpace(string(svcPubKey)) - } - - // check if the returned public key matches the local public key, this prevents adding a remote key to the user's account - if serverPubKey != localPubKey { - return unknownKeyErr - } - - var msg strings.Builder - msg.WriteString(unknownKeyErr.Error()) - msg.WriteString("\n\nYour ollama key is:\n") - msg.WriteString(localPubKey) - msg.WriteString("\nAdd your key at:\n") - msg.WriteString("https://ollama.com/settings/keys") - - return errors.New(msg.String()) - } - - return unknownKeyErr -} - func PushHandler(cmd *cobra.Command, args []string) error { client, err := api.ClientFromEnvironment() if err != nil { @@ -606,14 +565,6 @@ func PushHandler(cmd *cobra.Command, args []string) error { if strings.Contains(err.Error(), "access denied") { return errors.New("you are not authorized to push to this namespace, create the model under a namespace you own") } - host := model.ParseName(args[0]).Host - isOllamaHost := strings.HasSuffix(host, ".ollama.ai") || strings.HasSuffix(host, ".ollama.com") - if strings.Contains(err.Error(), errtypes.UnknownOllamaKeyErrMsg) && isOllamaHost { - // the user has not added their ollama key to ollama.com - // re-throw an error with a more user-friendly message - return errFromUnknownKey(err) - } - return err } diff --git a/examples/langchain-python-simple/main.py b/examples/langchain-python-simple/main.py index 8d6989c8..dafff827 100644 --- a/examples/langchain-python-simple/main.py +++ b/examples/langchain-python-simple/main.py @@ -1,6 +1,6 @@ from langchain.llms import Ollama -input = input("What is your question?") +input = input("What is your question?\n> ") llm = Ollama(model="llama3.2") -res = llm.predict(input) +res = llm.invoke(input) print (res) diff --git a/go.mod b/go.mod index 7eb6a535..496d8d3a 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect github.com/bytedance/sonic/loader v0.1.1 // indirect github.com/chewxy/hm v1.0.0 // indirect - github.com/chewxy/math32 v1.10.1 // indirect + github.com/chewxy/math32 v1.11.0 // indirect github.com/cloudwego/base64x v0.1.4 // indirect github.com/cloudwego/iasm v0.2.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect diff --git a/go.sum b/go.sum index 75b0c054..b3093ceb 100644 --- a/go.sum +++ b/go.sum @@ -21,8 +21,8 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA github.com/chewxy/hm v1.0.0 h1:zy/TSv3LV2nD3dwUEQL2VhXeoXbb9QkpmdRAVUFiA6k= github.com/chewxy/hm v1.0.0/go.mod h1:qg9YI4q6Fkj/whwHR1D+bOGeF7SniIP40VweVepLjg0= github.com/chewxy/math32 v1.0.0/go.mod h1:Miac6hA1ohdDUTagnvJy/q+aNnEk16qWUdb8ZVhvCN0= -github.com/chewxy/math32 v1.10.1 h1:LFpeY0SLJXeaiej/eIp2L40VYfscTvKh/FSEZ68uMkU= -github.com/chewxy/math32 v1.10.1/go.mod h1:dOB2rcuFrCn6UHrze36WSLVPKtzPMRAQvBvUwkSsLqs= +github.com/chewxy/math32 v1.11.0 h1:8sek2JWqeaKkVnHa7bPVqCEOUPbARo4SGxs6toKyAOo= +github.com/chewxy/math32 v1.11.0/go.mod h1:dOB2rcuFrCn6UHrze36WSLVPKtzPMRAQvBvUwkSsLqs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y= github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= @@ -231,8 +231,6 @@ golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod h1:FeLwcggjj3mMvU+o golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.14.0 h1:tNgSxAFe3jC4uYqvZdTr84SZoM1KfwdC9SKIFrLjFn4= -golang.org/x/image v0.14.0/go.mod h1:HUYqC05R2ZcZ3ejNQsIHQDQiwWM4JBqmm6MKANTp4LE= golang.org/x/image v0.22.0 h1:UtK5yLUzilVrkjMAZAZ34DXGpASN8i8pj8g+O+yd10g= golang.org/x/image v0.22.0/go.mod h1:9hPFhljd4zZ1GNSIZJ49sqbp45GKK9t6w+iXvGqZUz4= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -268,8 +266,6 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= -golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -296,8 +292,6 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= -golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/integration/max_queue_test.go b/integration/max_queue_test.go index ec9e085a..a2766430 100644 --- a/integration/max_queue_test.go +++ b/integration/max_queue_test.go @@ -16,7 +16,6 @@ import ( "github.com/stretchr/testify/require" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/envconfig" ) func TestMaxQueue(t *testing.T) { @@ -27,12 +26,8 @@ func TestMaxQueue(t *testing.T) { // Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU // Also note that by default Darwin can't sustain > ~128 connections without adjusting limits - threadCount := 32 - if maxQueue := envconfig.MaxQueue(); maxQueue != 0 { - threadCount = int(maxQueue) - } else { - t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount)) - } + threadCount := 16 + t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount)) req := api.GenerateRequest{ Model: "orca-mini", diff --git a/llama/runner/runner.go b/llama/runner/runner.go index c7662b33..db8092f3 100644 --- a/llama/runner/runner.go +++ b/llama/runner/runner.go @@ -300,6 +300,7 @@ func (s *Server) removeSequence(seqIndex int, reason string) { close(seq.embedding) seq.cache.InUse = false s.seqs[seqIndex] = nil + s.seqsSem.Release(1) } func (s *Server) run(ctx context.Context) { @@ -649,14 +650,18 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) { return } - // Ensure that a place to put the sequence is available + // Ensure there is a place to put the sequence, released when removed from s.seqs if err := s.seqsSem.Acquire(r.Context(), 1); err != nil { - slog.Error("Failed to acquire semaphore", "error", err) + if errors.Is(err, context.Canceled) { + slog.Info("aborting completion request due to client closing the connection") + } else { + slog.Error("Failed to acquire semaphore", "error", err) + } return } - defer s.seqsSem.Release(1) s.mu.Lock() + found := false for i, sq := range s.seqs { if sq == nil { seq.cache, seq.inputs, err = s.cache.LoadCacheSlot(seq.inputs, req.CachePrompt) @@ -670,11 +675,17 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) { s.seqs[i] = seq s.cond.Signal() + found = true break } } s.mu.Unlock() + if !found { + http.Error(w, "could not find an available sequence", http.StatusInternalServerError) + return + } + for { select { case <-r.Context().Done(): @@ -738,14 +749,18 @@ func (s *Server) embeddings(w http.ResponseWriter, r *http.Request) { return } - // Ensure that a place to put the sequence is available + // Ensure there is a place to put the sequence, released when removed from s.seqs if err := s.seqsSem.Acquire(r.Context(), 1); err != nil { - slog.Error("Failed to acquire semaphore", "error", err) + if errors.Is(err, context.Canceled) { + slog.Info("aborting embeddings request due to client closing the connection") + } else { + slog.Error("Failed to acquire semaphore", "error", err) + } return } - defer s.seqsSem.Release(1) s.mu.Lock() + found := false for i, sq := range s.seqs { if sq == nil { seq.cache, seq.inputs, err = s.cache.LoadCacheSlot(seq.inputs, req.CachePrompt) @@ -756,11 +771,17 @@ func (s *Server) embeddings(w http.ResponseWriter, r *http.Request) { } s.seqs[i] = seq s.cond.Signal() + found = true break } } s.mu.Unlock() + if !found { + http.Error(w, "could not find an available sequence", http.StatusInternalServerError) + return + } + embedding := <-seq.embedding if err := json.NewEncoder(w).Encode(&EmbeddingResponse{ diff --git a/llm/filetype.go b/llm/filetype.go index 7a8e9f69..10f3d670 100644 --- a/llm/filetype.go +++ b/llm/filetype.go @@ -32,9 +32,10 @@ const ( fileTypeIQ1_S fileTypeIQ4_NL fileTypeIQ3_S + fileTypeIQ3_M fileTypeIQ2_S - fileTypeIQ4_XS fileTypeIQ2_M + fileTypeIQ4_XS fileTypeIQ1_M fileTypeBF16 @@ -93,6 +94,8 @@ func ParseFileType(s string) (fileType, error) { return fileTypeIQ4_NL, nil case "IQ3_S": return fileTypeIQ3_S, nil + case "IQ3_M": + return fileTypeIQ3_M, nil case "IQ2_S": return fileTypeIQ2_S, nil case "IQ4_XS": @@ -160,6 +163,8 @@ func (t fileType) String() string { return "IQ4_NL" case fileTypeIQ3_S: return "IQ3_S" + case fileTypeIQ3_M: + return "IQ3_M" case fileTypeIQ2_S: return "IQ2_S" case fileTypeIQ4_XS: diff --git a/llm/server.go b/llm/server.go index d7c5198d..b2405905 100644 --- a/llm/server.go +++ b/llm/server.go @@ -687,7 +687,11 @@ type CompletionResponse struct { func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error { if err := s.sem.Acquire(ctx, 1); err != nil { - slog.Error("Failed to acquire semaphore", "error", err) + if errors.Is(err, context.Canceled) { + slog.Info("aborting completion request due to client closing the connection") + } else { + slog.Error("Failed to acquire semaphore", "error", err) + } return err } defer s.sem.Release(1) @@ -865,7 +869,11 @@ type EmbeddingResponse struct { func (s *llmServer) Embedding(ctx context.Context, input string) ([]float32, error) { if err := s.sem.Acquire(ctx, 1); err != nil { - slog.Error("Failed to acquire semaphore", "error", err) + if errors.Is(err, context.Canceled) { + slog.Info("aborting embedding request due to client closing the connection") + } else { + slog.Error("Failed to acquire semaphore", "error", err) + } return nil, err } defer s.sem.Release(1) diff --git a/server/images.go b/server/images.go index 6a0e8ae3..1f6a9712 100644 --- a/server/images.go +++ b/server/images.go @@ -5,7 +5,6 @@ import ( "cmp" "context" "crypto/sha256" - "encoding/base64" "encoding/hex" "encoding/json" "errors" @@ -24,14 +23,12 @@ import ( "strings" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/auth" "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/format" "github.com/ollama/ollama/llama" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/parser" "github.com/ollama/ollama/template" - "github.com/ollama/ollama/types/errtypes" "github.com/ollama/ollama/types/model" "github.com/ollama/ollama/version" ) @@ -985,37 +982,7 @@ func GetSHA256Digest(r io.Reader) (string, int64) { var errUnauthorized = errors.New("unauthorized: access denied") -// getTokenSubject returns the subject of a JWT token, it does not validate the token -func getTokenSubject(token string) string { - parts := strings.Split(token, ".") - if len(parts) != 3 { - return "" - } - - payload := parts[1] - payloadBytes, err := base64.RawURLEncoding.DecodeString(payload) - if err != nil { - slog.Error(fmt.Sprintf("failed to decode jwt payload: %v", err)) - return "" - } - - var payloadMap map[string]interface{} - if err := json.Unmarshal(payloadBytes, &payloadMap); err != nil { - slog.Error(fmt.Sprintf("failed to unmarshal payload JSON: %v", err)) - return "" - } - - sub, ok := payloadMap["sub"] - if !ok { - slog.Error("jwt does not contain 'sub' field") - return "" - } - - return fmt.Sprintf("%s", sub) -} - func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *registryOptions) (*http.Response, error) { - anonymous := true // access will default to anonymous if no user is found associated with the public key for range 2 { resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts) if err != nil { @@ -1036,7 +1003,6 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR if err != nil { return nil, err } - anonymous = getTokenSubject(token) == "anonymous" regOpts.Token = token if body != nil { _, err = body.Seek(0, io.SeekStart) @@ -1059,16 +1025,6 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR } } - if anonymous { - // no user is associated with the public key, and the request requires non-anonymous access - pubKey, nestedErr := auth.GetPublicKey() - if nestedErr != nil { - slog.Error(fmt.Sprintf("couldn't get public key: %v", nestedErr)) - return nil, errUnauthorized - } - return nil, &errtypes.UnknownOllamaKey{Key: pubKey} - } - // user is associated with the public key, but is not authorized to make the request return nil, errUnauthorized } diff --git a/server/routes.go b/server/routes.go index 5dfd6ffe..c13cd023 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1141,7 +1141,7 @@ func (s *Server) GenerateRoutes() http.Handler { config.AllowWildcard = true config.AllowBrowserExtensions = true config.AllowHeaders = []string{"Authorization", "Content-Type", "User-Agent", "Accept", "X-Requested-With"} - openAIProperties := []string{"lang", "package-version", "os", "arch", "runtime", "runtime-version", "async"} + openAIProperties := []string{"lang", "package-version", "os", "arch", "retry-count", "runtime", "runtime-version", "async"} for _, prop := range openAIProperties { config.AllowHeaders = append(config.AllowHeaders, "x-stainless-"+prop) }