mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-22 06:43:57 +00:00
Merge branch 'ollama:main' into main
This commit is contained in:
13
README.md
13
README.md
@@ -316,7 +316,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm)
|
- [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm)
|
||||||
- [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
|
- [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
|
||||||
- [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
|
- [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
|
||||||
- [QA-Pilot](https://github.com/reid41/QA-Pilot) (Chat with Code Repository)
|
- [QA-Pilot](https://github.com/reid41/QA-Pilot) (Interactive chat tool that can leverage Ollama models for rapid understanding and navigation of GitHub code repositories)
|
||||||
- [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases)
|
- [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases)
|
||||||
- [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG)
|
- [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG)
|
||||||
- [RAGFlow](https://github.com/infiniflow/ragflow) (Open-source Retrieval-Augmented Generation engine based on deep document understanding)
|
- [RAGFlow](https://github.com/infiniflow/ragflow) (Open-source Retrieval-Augmented Generation engine based on deep document understanding)
|
||||||
@@ -326,6 +326,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Ollama RAG Chatbot](https://github.com/datvodinh/rag-chatbot.git) (Local Chat with multiple PDFs using Ollama and RAG)
|
- [Ollama RAG Chatbot](https://github.com/datvodinh/rag-chatbot.git) (Local Chat with multiple PDFs using Ollama and RAG)
|
||||||
- [BrainSoup](https://www.nurgo-software.com/products/brainsoup) (Flexible native client with RAG & multi-agent automation)
|
- [BrainSoup](https://www.nurgo-software.com/products/brainsoup) (Flexible native client with RAG & multi-agent automation)
|
||||||
- [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends)
|
- [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends)
|
||||||
|
- [RWKV-Runner](https://github.com/josStorer/RWKV-Runner) (RWKV offline LLM deployment tool, also usable as a client for ChatGPT and Ollama)
|
||||||
- [Ollama Grid Search](https://github.com/dezoito/ollama-grid-search) (app to evaluate and compare models)
|
- [Ollama Grid Search](https://github.com/dezoito/ollama-grid-search) (app to evaluate and compare models)
|
||||||
- [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
|
- [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
|
||||||
- [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
|
- [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
|
||||||
@@ -368,7 +369,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Perfect Memory AI](https://www.perfectmemory.ai/) (Productivity AI assists personalized by what you have seen on your screen, heard and said in the meetings)
|
- [Perfect Memory AI](https://www.perfectmemory.ai/) (Productivity AI assists personalized by what you have seen on your screen, heard and said in the meetings)
|
||||||
- [Hexabot](https://github.com/hexastack/hexabot) (A conversational AI builder)
|
- [Hexabot](https://github.com/hexastack/hexabot) (A conversational AI builder)
|
||||||
- [Reddit Rate](https://github.com/rapidarchitect/reddit_analyzer) (Search and Rate Reddit topics with a weighted summation)
|
- [Reddit Rate](https://github.com/rapidarchitect/reddit_analyzer) (Search and Rate Reddit topics with a weighted summation)
|
||||||
- [OpenTalkGpt](https://github.com/adarshM84/OpenTalkGpt)
|
- [OpenTalkGpt](https://github.com/adarshM84/OpenTalkGpt) (Chrome Extension to manage open-source models supported by Ollama, create custom models, and chat with models from a user-friendly UI)
|
||||||
- [VT](https://github.com/vinhnx/vt.ai) (A minimal multimodal AI chat app, with dynamic conversation routing. Supports local models via Ollama)
|
- [VT](https://github.com/vinhnx/vt.ai) (A minimal multimodal AI chat app, with dynamic conversation routing. Supports local models via Ollama)
|
||||||
- [Nosia](https://github.com/nosia-ai/nosia) (Easy to install and use RAG platform based on Ollama)
|
- [Nosia](https://github.com/nosia-ai/nosia) (Easy to install and use RAG platform based on Ollama)
|
||||||
- [Witsy](https://github.com/nbonamy/witsy) (An AI Desktop application avaiable for Mac/Windows/Linux)
|
- [Witsy](https://github.com/nbonamy/witsy) (An AI Desktop application avaiable for Mac/Windows/Linux)
|
||||||
@@ -394,7 +395,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Oatmeal](https://github.com/dustinblackman/oatmeal)
|
- [Oatmeal](https://github.com/dustinblackman/oatmeal)
|
||||||
- [cmdh](https://github.com/pgibler/cmdh)
|
- [cmdh](https://github.com/pgibler/cmdh)
|
||||||
- [ooo](https://github.com/npahlfer/ooo)
|
- [ooo](https://github.com/npahlfer/ooo)
|
||||||
- [shell-pilot](https://github.com/reid41/shell-pilot)
|
- [shell-pilot](https://github.com/reid41/shell-pilot)(Interact with models via pure shell scripts on Linux or macOS)
|
||||||
- [tenere](https://github.com/pythops/tenere)
|
- [tenere](https://github.com/pythops/tenere)
|
||||||
- [llm-ollama](https://github.com/taketwo/llm-ollama) for [Datasette's LLM CLI](https://llm.datasette.io/en/stable/).
|
- [llm-ollama](https://github.com/taketwo/llm-ollama) for [Datasette's LLM CLI](https://llm.datasette.io/en/stable/).
|
||||||
- [typechat-cli](https://github.com/anaisbetts/typechat-cli)
|
- [typechat-cli](https://github.com/anaisbetts/typechat-cli)
|
||||||
@@ -410,6 +411,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [bb7](https://github.com/drunkwcodes/bb7)
|
- [bb7](https://github.com/drunkwcodes/bb7)
|
||||||
- [SwollamaCLI](https://github.com/marcusziade/Swollama) bundled with the Swollama Swift package. [Demo](https://github.com/marcusziade/Swollama?tab=readme-ov-file#cli-usage)
|
- [SwollamaCLI](https://github.com/marcusziade/Swollama) bundled with the Swollama Swift package. [Demo](https://github.com/marcusziade/Swollama?tab=readme-ov-file#cli-usage)
|
||||||
- [aichat](https://github.com/sigoden/aichat) All-in-one LLM CLI tool featuring Shell Assistant, Chat-REPL, RAG, AI tools & agents, with access to OpenAI, Claude, Gemini, Ollama, Groq, and more.
|
- [aichat](https://github.com/sigoden/aichat) All-in-one LLM CLI tool featuring Shell Assistant, Chat-REPL, RAG, AI tools & agents, with access to OpenAI, Claude, Gemini, Ollama, Groq, and more.
|
||||||
|
- [PowershAI](https://github.com/rrg92/powershai) PowerShell module that brings AI to terminal on Windows, including support for Ollama
|
||||||
- [orbiton](https://github.com/xyproto/orbiton) Configuration-free text editor and IDE with support for tab completion with Ollama.
|
- [orbiton](https://github.com/xyproto/orbiton) Configuration-free text editor and IDE with support for tab completion with Ollama.
|
||||||
|
|
||||||
### Apple Vision Pro
|
### Apple Vision Pro
|
||||||
@@ -514,6 +516,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
|
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
|
||||||
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
|
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
|
||||||
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
|
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
|
||||||
|
- [ChatGPTBox: All in one browser extension](https://github.com/josStorer/chatGPTBox) with [Integrating Tutorial](https://github.com/josStorer/chatGPTBox/issues/616#issuecomment-1975186467)
|
||||||
- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
|
- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
|
||||||
- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)
|
- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)
|
||||||
- [Terraform AWS Ollama & Open WebUI](https://github.com/xuyangbocn/terraform-aws-self-host-llm) (A Terraform module to deploy on AWS a ready-to-use Ollama service, together with its front end Open WebUI service.)
|
- [Terraform AWS Ollama & Open WebUI](https://github.com/xuyangbocn/terraform-aws-self-host-llm) (A Terraform module to deploy on AWS a ready-to-use Ollama service, together with its front end Open WebUI service.)
|
||||||
@@ -529,3 +532,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
### Supported backends
|
### Supported backends
|
||||||
|
|
||||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
||||||
|
|
||||||
|
### Observability
|
||||||
|
|
||||||
|
- [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics.
|
||||||
|
|||||||
57
cmd/cmd.go
57
cmd/cmd.go
@@ -19,7 +19,6 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -35,14 +34,11 @@ import (
|
|||||||
"golang.org/x/term"
|
"golang.org/x/term"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/auth"
|
|
||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/parser"
|
||||||
"github.com/ollama/ollama/progress"
|
"github.com/ollama/ollama/progress"
|
||||||
"github.com/ollama/ollama/server"
|
"github.com/ollama/ollama/server"
|
||||||
"github.com/ollama/ollama/types/errtypes"
|
|
||||||
"github.com/ollama/ollama/types/model"
|
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -456,6 +452,10 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
|||||||
if len(prompts) > 0 {
|
if len(prompts) > 0 {
|
||||||
interactive = false
|
interactive = false
|
||||||
}
|
}
|
||||||
|
// Be quiet if we're redirecting to a pipe or file
|
||||||
|
if !term.IsTerminal(int(os.Stdout.Fd())) {
|
||||||
|
interactive = false
|
||||||
|
}
|
||||||
|
|
||||||
nowrap, err := cmd.Flags().GetBool("nowordwrap")
|
nowrap, err := cmd.Flags().GetBool("nowordwrap")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -512,47 +512,6 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
|||||||
return generate(cmd, opts)
|
return generate(cmd, opts)
|
||||||
}
|
}
|
||||||
|
|
||||||
func errFromUnknownKey(unknownKeyErr error) error {
|
|
||||||
// find SSH public key in the error message
|
|
||||||
sshKeyPattern := `ssh-\w+ [^\s"]+`
|
|
||||||
re := regexp.MustCompile(sshKeyPattern)
|
|
||||||
matches := re.FindStringSubmatch(unknownKeyErr.Error())
|
|
||||||
|
|
||||||
if len(matches) > 0 {
|
|
||||||
serverPubKey := matches[0]
|
|
||||||
|
|
||||||
localPubKey, err := auth.GetPublicKey()
|
|
||||||
if err != nil {
|
|
||||||
return unknownKeyErr
|
|
||||||
}
|
|
||||||
|
|
||||||
if runtime.GOOS == "linux" && serverPubKey != localPubKey {
|
|
||||||
// try the ollama service public key
|
|
||||||
svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub")
|
|
||||||
if err != nil {
|
|
||||||
return unknownKeyErr
|
|
||||||
}
|
|
||||||
localPubKey = strings.TrimSpace(string(svcPubKey))
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if the returned public key matches the local public key, this prevents adding a remote key to the user's account
|
|
||||||
if serverPubKey != localPubKey {
|
|
||||||
return unknownKeyErr
|
|
||||||
}
|
|
||||||
|
|
||||||
var msg strings.Builder
|
|
||||||
msg.WriteString(unknownKeyErr.Error())
|
|
||||||
msg.WriteString("\n\nYour ollama key is:\n")
|
|
||||||
msg.WriteString(localPubKey)
|
|
||||||
msg.WriteString("\nAdd your key at:\n")
|
|
||||||
msg.WriteString("https://ollama.com/settings/keys")
|
|
||||||
|
|
||||||
return errors.New(msg.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
return unknownKeyErr
|
|
||||||
}
|
|
||||||
|
|
||||||
func PushHandler(cmd *cobra.Command, args []string) error {
|
func PushHandler(cmd *cobra.Command, args []string) error {
|
||||||
client, err := api.ClientFromEnvironment()
|
client, err := api.ClientFromEnvironment()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -606,14 +565,6 @@ func PushHandler(cmd *cobra.Command, args []string) error {
|
|||||||
if strings.Contains(err.Error(), "access denied") {
|
if strings.Contains(err.Error(), "access denied") {
|
||||||
return errors.New("you are not authorized to push to this namespace, create the model under a namespace you own")
|
return errors.New("you are not authorized to push to this namespace, create the model under a namespace you own")
|
||||||
}
|
}
|
||||||
host := model.ParseName(args[0]).Host
|
|
||||||
isOllamaHost := strings.HasSuffix(host, ".ollama.ai") || strings.HasSuffix(host, ".ollama.com")
|
|
||||||
if strings.Contains(err.Error(), errtypes.UnknownOllamaKeyErrMsg) && isOllamaHost {
|
|
||||||
// the user has not added their ollama key to ollama.com
|
|
||||||
// re-throw an error with a more user-friendly message
|
|
||||||
return errFromUnknownKey(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from langchain.llms import Ollama
|
from langchain.llms import Ollama
|
||||||
|
|
||||||
input = input("What is your question?")
|
input = input("What is your question?\n> ")
|
||||||
llm = Ollama(model="llama3.2")
|
llm = Ollama(model="llama3.2")
|
||||||
res = llm.predict(input)
|
res = llm.invoke(input)
|
||||||
print (res)
|
print (res)
|
||||||
|
|||||||
2
go.mod
2
go.mod
@@ -29,7 +29,7 @@ require (
|
|||||||
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect
|
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect
|
||||||
github.com/bytedance/sonic/loader v0.1.1 // indirect
|
github.com/bytedance/sonic/loader v0.1.1 // indirect
|
||||||
github.com/chewxy/hm v1.0.0 // indirect
|
github.com/chewxy/hm v1.0.0 // indirect
|
||||||
github.com/chewxy/math32 v1.10.1 // indirect
|
github.com/chewxy/math32 v1.11.0 // indirect
|
||||||
github.com/cloudwego/base64x v0.1.4 // indirect
|
github.com/cloudwego/base64x v0.1.4 // indirect
|
||||||
github.com/cloudwego/iasm v0.2.0 // indirect
|
github.com/cloudwego/iasm v0.2.0 // indirect
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
|
|||||||
10
go.sum
10
go.sum
@@ -21,8 +21,8 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA
|
|||||||
github.com/chewxy/hm v1.0.0 h1:zy/TSv3LV2nD3dwUEQL2VhXeoXbb9QkpmdRAVUFiA6k=
|
github.com/chewxy/hm v1.0.0 h1:zy/TSv3LV2nD3dwUEQL2VhXeoXbb9QkpmdRAVUFiA6k=
|
||||||
github.com/chewxy/hm v1.0.0/go.mod h1:qg9YI4q6Fkj/whwHR1D+bOGeF7SniIP40VweVepLjg0=
|
github.com/chewxy/hm v1.0.0/go.mod h1:qg9YI4q6Fkj/whwHR1D+bOGeF7SniIP40VweVepLjg0=
|
||||||
github.com/chewxy/math32 v1.0.0/go.mod h1:Miac6hA1ohdDUTagnvJy/q+aNnEk16qWUdb8ZVhvCN0=
|
github.com/chewxy/math32 v1.0.0/go.mod h1:Miac6hA1ohdDUTagnvJy/q+aNnEk16qWUdb8ZVhvCN0=
|
||||||
github.com/chewxy/math32 v1.10.1 h1:LFpeY0SLJXeaiej/eIp2L40VYfscTvKh/FSEZ68uMkU=
|
github.com/chewxy/math32 v1.11.0 h1:8sek2JWqeaKkVnHa7bPVqCEOUPbARo4SGxs6toKyAOo=
|
||||||
github.com/chewxy/math32 v1.10.1/go.mod h1:dOB2rcuFrCn6UHrze36WSLVPKtzPMRAQvBvUwkSsLqs=
|
github.com/chewxy/math32 v1.11.0/go.mod h1:dOB2rcuFrCn6UHrze36WSLVPKtzPMRAQvBvUwkSsLqs=
|
||||||
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
||||||
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
|
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
|
||||||
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
|
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
|
||||||
@@ -231,8 +231,6 @@ golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod h1:FeLwcggjj3mMvU+o
|
|||||||
golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
||||||
golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
||||||
golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
||||||
golang.org/x/image v0.14.0 h1:tNgSxAFe3jC4uYqvZdTr84SZoM1KfwdC9SKIFrLjFn4=
|
|
||||||
golang.org/x/image v0.14.0/go.mod h1:HUYqC05R2ZcZ3ejNQsIHQDQiwWM4JBqmm6MKANTp4LE=
|
|
||||||
golang.org/x/image v0.22.0 h1:UtK5yLUzilVrkjMAZAZ34DXGpASN8i8pj8g+O+yd10g=
|
golang.org/x/image v0.22.0 h1:UtK5yLUzilVrkjMAZAZ34DXGpASN8i8pj8g+O+yd10g=
|
||||||
golang.org/x/image v0.22.0/go.mod h1:9hPFhljd4zZ1GNSIZJ49sqbp45GKK9t6w+iXvGqZUz4=
|
golang.org/x/image v0.22.0/go.mod h1:9hPFhljd4zZ1GNSIZJ49sqbp45GKK9t6w+iXvGqZUz4=
|
||||||
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
||||||
@@ -268,8 +266,6 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
|
|||||||
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
|
|
||||||
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
|
|
||||||
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
|
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
|
||||||
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
@@ -296,8 +292,6 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
|||||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
|
|
||||||
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
|
||||||
golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug=
|
golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug=
|
||||||
golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4=
|
golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4=
|
||||||
golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ import (
|
|||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/envconfig"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestMaxQueue(t *testing.T) {
|
func TestMaxQueue(t *testing.T) {
|
||||||
@@ -27,12 +26,8 @@ func TestMaxQueue(t *testing.T) {
|
|||||||
|
|
||||||
// Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU
|
// Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU
|
||||||
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
|
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
|
||||||
threadCount := 32
|
threadCount := 16
|
||||||
if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
|
|
||||||
threadCount = int(maxQueue)
|
|
||||||
} else {
|
|
||||||
t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
|
t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
|
||||||
}
|
|
||||||
|
|
||||||
req := api.GenerateRequest{
|
req := api.GenerateRequest{
|
||||||
Model: "orca-mini",
|
Model: "orca-mini",
|
||||||
|
|||||||
@@ -300,6 +300,7 @@ func (s *Server) removeSequence(seqIndex int, reason string) {
|
|||||||
close(seq.embedding)
|
close(seq.embedding)
|
||||||
seq.cache.InUse = false
|
seq.cache.InUse = false
|
||||||
s.seqs[seqIndex] = nil
|
s.seqs[seqIndex] = nil
|
||||||
|
s.seqsSem.Release(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) run(ctx context.Context) {
|
func (s *Server) run(ctx context.Context) {
|
||||||
@@ -649,14 +650,18 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure that a place to put the sequence is available
|
// Ensure there is a place to put the sequence, released when removed from s.seqs
|
||||||
if err := s.seqsSem.Acquire(r.Context(), 1); err != nil {
|
if err := s.seqsSem.Acquire(r.Context(), 1); err != nil {
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
slog.Info("aborting completion request due to client closing the connection")
|
||||||
|
} else {
|
||||||
slog.Error("Failed to acquire semaphore", "error", err)
|
slog.Error("Failed to acquire semaphore", "error", err)
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer s.seqsSem.Release(1)
|
|
||||||
|
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
|
found := false
|
||||||
for i, sq := range s.seqs {
|
for i, sq := range s.seqs {
|
||||||
if sq == nil {
|
if sq == nil {
|
||||||
seq.cache, seq.inputs, err = s.cache.LoadCacheSlot(seq.inputs, req.CachePrompt)
|
seq.cache, seq.inputs, err = s.cache.LoadCacheSlot(seq.inputs, req.CachePrompt)
|
||||||
@@ -670,11 +675,17 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
s.seqs[i] = seq
|
s.seqs[i] = seq
|
||||||
s.cond.Signal()
|
s.cond.Signal()
|
||||||
|
found = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
|
|
||||||
|
if !found {
|
||||||
|
http.Error(w, "could not find an available sequence", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-r.Context().Done():
|
case <-r.Context().Done():
|
||||||
@@ -738,14 +749,18 @@ func (s *Server) embeddings(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure that a place to put the sequence is available
|
// Ensure there is a place to put the sequence, released when removed from s.seqs
|
||||||
if err := s.seqsSem.Acquire(r.Context(), 1); err != nil {
|
if err := s.seqsSem.Acquire(r.Context(), 1); err != nil {
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
slog.Info("aborting embeddings request due to client closing the connection")
|
||||||
|
} else {
|
||||||
slog.Error("Failed to acquire semaphore", "error", err)
|
slog.Error("Failed to acquire semaphore", "error", err)
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer s.seqsSem.Release(1)
|
|
||||||
|
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
|
found := false
|
||||||
for i, sq := range s.seqs {
|
for i, sq := range s.seqs {
|
||||||
if sq == nil {
|
if sq == nil {
|
||||||
seq.cache, seq.inputs, err = s.cache.LoadCacheSlot(seq.inputs, req.CachePrompt)
|
seq.cache, seq.inputs, err = s.cache.LoadCacheSlot(seq.inputs, req.CachePrompt)
|
||||||
@@ -756,11 +771,17 @@ func (s *Server) embeddings(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
s.seqs[i] = seq
|
s.seqs[i] = seq
|
||||||
s.cond.Signal()
|
s.cond.Signal()
|
||||||
|
found = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
|
|
||||||
|
if !found {
|
||||||
|
http.Error(w, "could not find an available sequence", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
embedding := <-seq.embedding
|
embedding := <-seq.embedding
|
||||||
|
|
||||||
if err := json.NewEncoder(w).Encode(&EmbeddingResponse{
|
if err := json.NewEncoder(w).Encode(&EmbeddingResponse{
|
||||||
|
|||||||
@@ -32,9 +32,10 @@ const (
|
|||||||
fileTypeIQ1_S
|
fileTypeIQ1_S
|
||||||
fileTypeIQ4_NL
|
fileTypeIQ4_NL
|
||||||
fileTypeIQ3_S
|
fileTypeIQ3_S
|
||||||
|
fileTypeIQ3_M
|
||||||
fileTypeIQ2_S
|
fileTypeIQ2_S
|
||||||
fileTypeIQ4_XS
|
|
||||||
fileTypeIQ2_M
|
fileTypeIQ2_M
|
||||||
|
fileTypeIQ4_XS
|
||||||
fileTypeIQ1_M
|
fileTypeIQ1_M
|
||||||
fileTypeBF16
|
fileTypeBF16
|
||||||
|
|
||||||
@@ -93,6 +94,8 @@ func ParseFileType(s string) (fileType, error) {
|
|||||||
return fileTypeIQ4_NL, nil
|
return fileTypeIQ4_NL, nil
|
||||||
case "IQ3_S":
|
case "IQ3_S":
|
||||||
return fileTypeIQ3_S, nil
|
return fileTypeIQ3_S, nil
|
||||||
|
case "IQ3_M":
|
||||||
|
return fileTypeIQ3_M, nil
|
||||||
case "IQ2_S":
|
case "IQ2_S":
|
||||||
return fileTypeIQ2_S, nil
|
return fileTypeIQ2_S, nil
|
||||||
case "IQ4_XS":
|
case "IQ4_XS":
|
||||||
@@ -160,6 +163,8 @@ func (t fileType) String() string {
|
|||||||
return "IQ4_NL"
|
return "IQ4_NL"
|
||||||
case fileTypeIQ3_S:
|
case fileTypeIQ3_S:
|
||||||
return "IQ3_S"
|
return "IQ3_S"
|
||||||
|
case fileTypeIQ3_M:
|
||||||
|
return "IQ3_M"
|
||||||
case fileTypeIQ2_S:
|
case fileTypeIQ2_S:
|
||||||
return "IQ2_S"
|
return "IQ2_S"
|
||||||
case fileTypeIQ4_XS:
|
case fileTypeIQ4_XS:
|
||||||
|
|||||||
@@ -687,7 +687,11 @@ type CompletionResponse struct {
|
|||||||
|
|
||||||
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
|
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
|
||||||
if err := s.sem.Acquire(ctx, 1); err != nil {
|
if err := s.sem.Acquire(ctx, 1); err != nil {
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
slog.Info("aborting completion request due to client closing the connection")
|
||||||
|
} else {
|
||||||
slog.Error("Failed to acquire semaphore", "error", err)
|
slog.Error("Failed to acquire semaphore", "error", err)
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer s.sem.Release(1)
|
defer s.sem.Release(1)
|
||||||
@@ -865,7 +869,11 @@ type EmbeddingResponse struct {
|
|||||||
|
|
||||||
func (s *llmServer) Embedding(ctx context.Context, input string) ([]float32, error) {
|
func (s *llmServer) Embedding(ctx context.Context, input string) ([]float32, error) {
|
||||||
if err := s.sem.Acquire(ctx, 1); err != nil {
|
if err := s.sem.Acquire(ctx, 1); err != nil {
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
slog.Info("aborting embedding request due to client closing the connection")
|
||||||
|
} else {
|
||||||
slog.Error("Failed to acquire semaphore", "error", err)
|
slog.Error("Failed to acquire semaphore", "error", err)
|
||||||
|
}
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer s.sem.Release(1)
|
defer s.sem.Release(1)
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import (
|
|||||||
"cmp"
|
"cmp"
|
||||||
"context"
|
"context"
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
"encoding/base64"
|
|
||||||
"encoding/hex"
|
"encoding/hex"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
@@ -24,14 +23,12 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/auth"
|
|
||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/llama"
|
"github.com/ollama/ollama/llama"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/parser"
|
||||||
"github.com/ollama/ollama/template"
|
"github.com/ollama/ollama/template"
|
||||||
"github.com/ollama/ollama/types/errtypes"
|
|
||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
)
|
)
|
||||||
@@ -985,37 +982,7 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
|
|||||||
|
|
||||||
var errUnauthorized = errors.New("unauthorized: access denied")
|
var errUnauthorized = errors.New("unauthorized: access denied")
|
||||||
|
|
||||||
// getTokenSubject returns the subject of a JWT token, it does not validate the token
|
|
||||||
func getTokenSubject(token string) string {
|
|
||||||
parts := strings.Split(token, ".")
|
|
||||||
if len(parts) != 3 {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
payload := parts[1]
|
|
||||||
payloadBytes, err := base64.RawURLEncoding.DecodeString(payload)
|
|
||||||
if err != nil {
|
|
||||||
slog.Error(fmt.Sprintf("failed to decode jwt payload: %v", err))
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
var payloadMap map[string]interface{}
|
|
||||||
if err := json.Unmarshal(payloadBytes, &payloadMap); err != nil {
|
|
||||||
slog.Error(fmt.Sprintf("failed to unmarshal payload JSON: %v", err))
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
sub, ok := payloadMap["sub"]
|
|
||||||
if !ok {
|
|
||||||
slog.Error("jwt does not contain 'sub' field")
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
return fmt.Sprintf("%s", sub)
|
|
||||||
}
|
|
||||||
|
|
||||||
func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *registryOptions) (*http.Response, error) {
|
func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *registryOptions) (*http.Response, error) {
|
||||||
anonymous := true // access will default to anonymous if no user is found associated with the public key
|
|
||||||
for range 2 {
|
for range 2 {
|
||||||
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
|
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -1036,7 +1003,6 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
anonymous = getTokenSubject(token) == "anonymous"
|
|
||||||
regOpts.Token = token
|
regOpts.Token = token
|
||||||
if body != nil {
|
if body != nil {
|
||||||
_, err = body.Seek(0, io.SeekStart)
|
_, err = body.Seek(0, io.SeekStart)
|
||||||
@@ -1059,16 +1025,6 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if anonymous {
|
|
||||||
// no user is associated with the public key, and the request requires non-anonymous access
|
|
||||||
pubKey, nestedErr := auth.GetPublicKey()
|
|
||||||
if nestedErr != nil {
|
|
||||||
slog.Error(fmt.Sprintf("couldn't get public key: %v", nestedErr))
|
|
||||||
return nil, errUnauthorized
|
|
||||||
}
|
|
||||||
return nil, &errtypes.UnknownOllamaKey{Key: pubKey}
|
|
||||||
}
|
|
||||||
// user is associated with the public key, but is not authorized to make the request
|
|
||||||
return nil, errUnauthorized
|
return nil, errUnauthorized
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1141,7 +1141,7 @@ func (s *Server) GenerateRoutes() http.Handler {
|
|||||||
config.AllowWildcard = true
|
config.AllowWildcard = true
|
||||||
config.AllowBrowserExtensions = true
|
config.AllowBrowserExtensions = true
|
||||||
config.AllowHeaders = []string{"Authorization", "Content-Type", "User-Agent", "Accept", "X-Requested-With"}
|
config.AllowHeaders = []string{"Authorization", "Content-Type", "User-Agent", "Accept", "X-Requested-With"}
|
||||||
openAIProperties := []string{"lang", "package-version", "os", "arch", "runtime", "runtime-version", "async"}
|
openAIProperties := []string{"lang", "package-version", "os", "arch", "retry-count", "runtime", "runtime-version", "async"}
|
||||||
for _, prop := range openAIProperties {
|
for _, prop := range openAIProperties {
|
||||||
config.AllowHeaders = append(config.AllowHeaders, "x-stainless-"+prop)
|
config.AllowHeaders = append(config.AllowHeaders, "x-stainless-"+prop)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user