mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-22 06:43:57 +00:00
Merge branch 'ollama:main' into main
This commit is contained in:
@@ -370,12 +370,13 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram)
|
- [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram)
|
||||||
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
|
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
|
||||||
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
|
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
|
||||||
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
|
|
||||||
- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
|
- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
|
||||||
- [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support)
|
- [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support)
|
||||||
- [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot)
|
- [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot)
|
||||||
- [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
|
- [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
|
||||||
- [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
|
- [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
|
||||||
|
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
|
||||||
|
- [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
|
||||||
- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
|
- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
|
||||||
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
|
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
|
||||||
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
|
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
|
||||||
@@ -385,3 +386,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
|
|
||||||
### Supported backends
|
### Supported backends
|
||||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
||||||
|
|
||||||
|
|||||||
@@ -5,12 +5,14 @@ import (
|
|||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/server/envconfig"
|
||||||
)
|
)
|
||||||
|
|
||||||
func InitLogging() {
|
func InitLogging() {
|
||||||
level := slog.LevelInfo
|
level := slog.LevelInfo
|
||||||
|
|
||||||
if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
|
if envconfig.Debug {
|
||||||
level = slog.LevelDebug
|
level = slog.LevelDebug
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -31,16 +31,13 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
|||||||
"/LOG=" + filepath.Base(UpgradeLogFile), // Only relative seems reliable, so set pwd
|
"/LOG=" + filepath.Base(UpgradeLogFile), // Only relative seems reliable, so set pwd
|
||||||
"/FORCECLOSEAPPLICATIONS", // Force close the tray app - might be needed
|
"/FORCECLOSEAPPLICATIONS", // Force close the tray app - might be needed
|
||||||
}
|
}
|
||||||
// When we're not in debug mode, make the upgrade as quiet as possible (no GUI, no prompts)
|
// make the upgrade as quiet as possible (no GUI, no prompts)
|
||||||
// TODO - temporarily disable since we're pinning in debug mode for the preview
|
|
||||||
// if debug := os.Getenv("OLLAMA_DEBUG"); debug == "" {
|
|
||||||
installArgs = append(installArgs,
|
installArgs = append(installArgs,
|
||||||
"/SP", // Skip the "This will install... Do you wish to continue" prompt
|
"/SP", // Skip the "This will install... Do you wish to continue" prompt
|
||||||
"/SUPPRESSMSGBOXES",
|
"/SUPPRESSMSGBOXES",
|
||||||
"/SILENT",
|
"/SILENT",
|
||||||
"/VERYSILENT",
|
"/VERYSILENT",
|
||||||
)
|
)
|
||||||
// }
|
|
||||||
|
|
||||||
// Safeguard in case we have requests in flight that need to drain...
|
// Safeguard in case we have requests in flight that need to drain...
|
||||||
slog.Info("Waiting for server to shutdown")
|
slog.Info("Waiting for server to shutdown")
|
||||||
|
|||||||
19
cmd/cmd.go
19
cmd/cmd.go
@@ -34,7 +34,6 @@ import (
|
|||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/auth"
|
"github.com/ollama/ollama/auth"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/parser"
|
|
||||||
"github.com/ollama/ollama/progress"
|
"github.com/ollama/ollama/progress"
|
||||||
"github.com/ollama/ollama/server"
|
"github.com/ollama/ollama/server"
|
||||||
"github.com/ollama/ollama/types/errtypes"
|
"github.com/ollama/ollama/types/errtypes"
|
||||||
@@ -57,13 +56,13 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
p := progress.NewProgress(os.Stderr)
|
p := progress.NewProgress(os.Stderr)
|
||||||
defer p.Stop()
|
defer p.Stop()
|
||||||
|
|
||||||
modelfile, err := os.Open(filename)
|
f, err := os.Open(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer modelfile.Close()
|
defer f.Close()
|
||||||
|
|
||||||
commands, err := parser.Parse(modelfile)
|
modelfile, err := model.ParseFile(f)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -77,10 +76,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
spinner := progress.NewSpinner(status)
|
spinner := progress.NewSpinner(status)
|
||||||
p.Add(status, spinner)
|
p.Add(status, spinner)
|
||||||
|
|
||||||
for i := range commands {
|
for i := range modelfile.Commands {
|
||||||
switch commands[i].Name {
|
switch modelfile.Commands[i].Name {
|
||||||
case "model", "adapter":
|
case "model", "adapter":
|
||||||
path := commands[i].Args
|
path := modelfile.Commands[i].Args
|
||||||
if path == "~" {
|
if path == "~" {
|
||||||
path = home
|
path = home
|
||||||
} else if strings.HasPrefix(path, "~/") {
|
} else if strings.HasPrefix(path, "~/") {
|
||||||
@@ -92,7 +91,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fi, err := os.Stat(path)
|
fi, err := os.Stat(path)
|
||||||
if errors.Is(err, os.ErrNotExist) && commands[i].Name == "model" {
|
if errors.Is(err, os.ErrNotExist) && modelfile.Commands[i].Name == "model" {
|
||||||
continue
|
continue
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -115,7 +114,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
commands[i].Args = "@"+digest
|
modelfile.Commands[i].Args = "@" + digest
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -145,7 +144,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
|
|
||||||
quantization, _ := cmd.Flags().GetString("quantization")
|
quantization, _ := cmd.Flags().GetString("quantization")
|
||||||
|
|
||||||
request := api.CreateRequest{Name: args[0], Modelfile: parser.Format(commands), Quantization: quantization}
|
request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantization: quantization}
|
||||||
if err := client.Create(cmd.Context(), &request, fn); err != nil {
|
if err := client.Create(cmd.Context(), &request, fn); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Ten
|
|||||||
var err error
|
var err error
|
||||||
t, offset, err = m.readTensors(f, offset, params)
|
t, offset, err = m.readTensors(f, offset, params)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Error("%v", err)
|
slog.Error(err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
tensors = append(tensors, t...)
|
tensors = append(tensors, t...)
|
||||||
@@ -122,7 +122,7 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params)
|
|||||||
|
|
||||||
ggufName, err := m.GetLayerName(k)
|
ggufName, err := m.GetLayerName(k)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Error("%v", err)
|
slog.Error(err.Error())
|
||||||
return nil, 0, err
|
return nil, 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor,
|
|||||||
|
|
||||||
ggufName, err := tf.GetLayerName(k.(string))
|
ggufName, err := tf.GetLayerName(k.(string))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Error("%v", err)
|
slog.Error(err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
slog.Debug(fmt.Sprintf("finding name for '%s' -> '%s'", k.(string), ggufName))
|
slog.Debug(fmt.Sprintf("finding name for '%s' -> '%s'", k.(string), ggufName))
|
||||||
|
|||||||
58
docs/api.md
58
docs/api.md
@@ -17,7 +17,7 @@
|
|||||||
|
|
||||||
### Model names
|
### Model names
|
||||||
|
|
||||||
Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama2:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
|
Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
|
||||||
|
|
||||||
### Durations
|
### Durations
|
||||||
|
|
||||||
@@ -66,7 +66,7 @@ Enable JSON mode by setting the `format` parameter to `json`. This will structur
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"prompt": "Why is the sky blue?"
|
"prompt": "Why is the sky blue?"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
@@ -77,7 +77,7 @@ A stream of JSON objects is returned:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||||
"response": "The",
|
"response": "The",
|
||||||
"done": false
|
"done": false
|
||||||
@@ -99,7 +99,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"response": "",
|
"response": "",
|
||||||
"done": true,
|
"done": true,
|
||||||
@@ -121,7 +121,7 @@ A response can be received in one reply when streaming is off.
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"prompt": "Why is the sky blue?",
|
"prompt": "Why is the sky blue?",
|
||||||
"stream": false
|
"stream": false
|
||||||
}'
|
}'
|
||||||
@@ -133,7 +133,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"response": "The sky is blue because it is the color of the sky.",
|
"response": "The sky is blue because it is the color of the sky.",
|
||||||
"done": true,
|
"done": true,
|
||||||
@@ -155,7 +155,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"prompt": "What color is the sky at different times of the day? Respond using JSON",
|
"prompt": "What color is the sky at different times of the day? Respond using JSON",
|
||||||
"format": "json",
|
"format": "json",
|
||||||
"stream": false
|
"stream": false
|
||||||
@@ -166,7 +166,7 @@ curl http://localhost:11434/api/generate -d '{
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"created_at": "2023-11-09T21:07:55.186497Z",
|
"created_at": "2023-11-09T21:07:55.186497Z",
|
||||||
"response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
|
"response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
|
||||||
"done": true,
|
"done": true,
|
||||||
@@ -289,7 +289,7 @@ If you want to set custom options for the model at runtime rather than in the Mo
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"prompt": "Why is the sky blue?",
|
"prompt": "Why is the sky blue?",
|
||||||
"stream": false,
|
"stream": false,
|
||||||
"options": {
|
"options": {
|
||||||
@@ -332,7 +332,7 @@ curl http://localhost:11434/api/generate -d '{
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"response": "The sky is blue because it is the color of the sky.",
|
"response": "The sky is blue because it is the color of the sky.",
|
||||||
"done": true,
|
"done": true,
|
||||||
@@ -354,7 +354,7 @@ If an empty prompt is provided, the model will be loaded into memory.
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama2"
|
"model": "llama3"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -364,7 +364,7 @@ A single JSON object is returned:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"created_at": "2023-12-18T19:52:07.071755Z",
|
"created_at": "2023-12-18T19:52:07.071755Z",
|
||||||
"response": "",
|
"response": "",
|
||||||
"done": true
|
"done": true
|
||||||
@@ -407,7 +407,7 @@ Send a chat message with a streaming response.
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@@ -423,7 +423,7 @@ A stream of JSON objects is returned:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
@@ -438,7 +438,7 @@ Final response:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"total_duration": 4883583458,
|
"total_duration": 4883583458,
|
||||||
@@ -456,7 +456,7 @@ Final response:
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@@ -471,7 +471,7 @@ curl http://localhost:11434/api/chat -d '{
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "registry.ollama.ai/library/llama2:latest",
|
"model": "registry.ollama.ai/library/llama3:latest",
|
||||||
"created_at": "2023-12-12T14:13:43.416799Z",
|
"created_at": "2023-12-12T14:13:43.416799Z",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
@@ -495,7 +495,7 @@ Send a chat message with a conversation history. You can use this same approach
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@@ -519,7 +519,7 @@ A stream of JSON objects is returned:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
@@ -533,7 +533,7 @@ Final response:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"total_duration": 8113331500,
|
"total_duration": 8113331500,
|
||||||
@@ -591,7 +591,7 @@ curl http://localhost:11434/api/chat -d '{
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@@ -609,7 +609,7 @@ curl http://localhost:11434/api/chat -d '{
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "registry.ollama.ai/library/llama2:latest",
|
"model": "registry.ollama.ai/library/llama3:latest",
|
||||||
"created_at": "2023-12-12T14:13:43.416799Z",
|
"created_at": "2023-12-12T14:13:43.416799Z",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
@@ -651,7 +651,7 @@ Create a new model from a `Modelfile`.
|
|||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/create -d '{
|
curl http://localhost:11434/api/create -d '{
|
||||||
"name": "mario",
|
"name": "mario",
|
||||||
"modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros."
|
"modelfile": "FROM llama3\nSYSTEM You are mario from Super Mario Bros."
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -758,7 +758,7 @@ A single JSON object will be returned.
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "llama2:latest",
|
"name": "llama3:latest",
|
||||||
"modified_at": "2023-12-07T09:32:18.757212583-08:00",
|
"modified_at": "2023-12-07T09:32:18.757212583-08:00",
|
||||||
"size": 3825819519,
|
"size": 3825819519,
|
||||||
"digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e",
|
"digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e",
|
||||||
@@ -792,7 +792,7 @@ Show information about a model including details, modelfile, template, parameter
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/show -d '{
|
curl http://localhost:11434/api/show -d '{
|
||||||
"name": "llama2"
|
"name": "llama3"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -827,8 +827,8 @@ Copy a model. Creates a model with another name from an existing model.
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/copy -d '{
|
curl http://localhost:11434/api/copy -d '{
|
||||||
"source": "llama2",
|
"source": "llama3",
|
||||||
"destination": "llama2-backup"
|
"destination": "llama3-backup"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -854,7 +854,7 @@ Delete a model and its data.
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -X DELETE http://localhost:11434/api/delete -d '{
|
curl -X DELETE http://localhost:11434/api/delete -d '{
|
||||||
"name": "llama2:13b"
|
"name": "llama3:13b"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -882,7 +882,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/pull -d '{
|
curl http://localhost:11434/api/pull -d '{
|
||||||
"name": "llama2"
|
"name": "llama3"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
12
docs/faq.md
12
docs/faq.md
@@ -32,7 +32,7 @@ When using the API, specify the `num_ctx` parameter:
|
|||||||
|
|
||||||
```
|
```
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"prompt": "Why is the sky blue?",
|
"prompt": "Why is the sky blue?",
|
||||||
"options": {
|
"options": {
|
||||||
"num_ctx": 4096
|
"num_ctx": 4096
|
||||||
@@ -221,14 +221,20 @@ The `keep_alive` parameter can be set to:
|
|||||||
|
|
||||||
For example, to preload a model and leave it in memory use:
|
For example, to preload a model and leave it in memory use:
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{"model": "llama2", "keep_alive": -1}'
|
curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": -1}'
|
||||||
```
|
```
|
||||||
|
|
||||||
To unload the model and free up memory use:
|
To unload the model and free up memory use:
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{"model": "llama2", "keep_alive": 0}'
|
curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": 0}'
|
||||||
```
|
```
|
||||||
|
|
||||||
Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable.
|
Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable.
|
||||||
|
|
||||||
If you wish to override the `OLLAMA_KEEP_ALIVE` setting, use the `keep_alive` API parameter with the `/api/generate` or `/api/chat` API endpoints.
|
If you wish to override the `OLLAMA_KEEP_ALIVE` setting, use the `keep_alive` API parameter with the `/api/generate` or `/api/chat` API endpoints.
|
||||||
|
|
||||||
|
## How do I manage the maximum number of requests the server can queue
|
||||||
|
|
||||||
|
If too many requests are sent to the server, it will respond with a 503 error
|
||||||
|
indicating the server is overloaded. You can adjust how many requests may be
|
||||||
|
queue by setting `OLLAMA_MAX_QUEUE`
|
||||||
@@ -10,7 +10,7 @@ A model file is the blueprint to create and share models with Ollama.
|
|||||||
- [Examples](#examples)
|
- [Examples](#examples)
|
||||||
- [Instructions](#instructions)
|
- [Instructions](#instructions)
|
||||||
- [FROM (Required)](#from-required)
|
- [FROM (Required)](#from-required)
|
||||||
- [Build from llama2](#build-from-llama2)
|
- [Build from llama3](#build-from-llama3)
|
||||||
- [Build from a bin file](#build-from-a-bin-file)
|
- [Build from a bin file](#build-from-a-bin-file)
|
||||||
- [PARAMETER](#parameter)
|
- [PARAMETER](#parameter)
|
||||||
- [Valid Parameters and Values](#valid-parameters-and-values)
|
- [Valid Parameters and Values](#valid-parameters-and-values)
|
||||||
@@ -48,7 +48,7 @@ INSTRUCTION arguments
|
|||||||
An example of a `Modelfile` creating a mario blueprint:
|
An example of a `Modelfile` creating a mario blueprint:
|
||||||
|
|
||||||
```modelfile
|
```modelfile
|
||||||
FROM llama2
|
FROM llama3
|
||||||
# sets the temperature to 1 [higher is more creative, lower is more coherent]
|
# sets the temperature to 1 [higher is more creative, lower is more coherent]
|
||||||
PARAMETER temperature 1
|
PARAMETER temperature 1
|
||||||
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
|
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
|
||||||
@@ -67,33 +67,25 @@ To use this:
|
|||||||
|
|
||||||
More examples are available in the [examples directory](../examples).
|
More examples are available in the [examples directory](../examples).
|
||||||
|
|
||||||
### `Modelfile`s in [ollama.com/library][1]
|
To view the Modelfile of a given model, use the `ollama show --modelfile` command.
|
||||||
|
|
||||||
There are two ways to view `Modelfile`s underlying the models in [ollama.com/library][1]:
|
|
||||||
|
|
||||||
- Option 1: view a details page from a model's tags page:
|
|
||||||
1. Go to a particular model's tags (e.g. https://ollama.com/library/llama2/tags)
|
|
||||||
2. Click on a tag (e.g. https://ollama.com/library/llama2:13b)
|
|
||||||
3. Scroll down to "Layers"
|
|
||||||
- Note: if the [`FROM` instruction](#from-required) is not present,
|
|
||||||
it means the model was created from a local file
|
|
||||||
- Option 2: use `ollama show` to print the `Modelfile` for any local models like so:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
> ollama show --modelfile llama2:13b
|
> ollama show --modelfile llama3
|
||||||
# Modelfile generated by "ollama show"
|
# Modelfile generated by "ollama show"
|
||||||
# To build a new Modelfile based on this one, replace the FROM line with:
|
# To build a new Modelfile based on this one, replace the FROM line with:
|
||||||
# FROM llama2:13b
|
# FROM llama3:latest
|
||||||
|
FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
|
||||||
|
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
||||||
|
|
||||||
FROM /root/.ollama/models/blobs/sha256:123abc
|
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
|
||||||
TEMPLATE """[INST] {{ if .System }}<<SYS>>{{ .System }}<</SYS>>
|
|
||||||
|
|
||||||
{{ end }}{{ .Prompt }} [/INST] """
|
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
|
||||||
SYSTEM """"""
|
|
||||||
PARAMETER stop [INST]
|
{{ .Response }}<|eot_id|>"""
|
||||||
PARAMETER stop [/INST]
|
PARAMETER stop "<|start_header_id|>"
|
||||||
PARAMETER stop <<SYS>>
|
PARAMETER stop "<|end_header_id|>"
|
||||||
PARAMETER stop <</SYS>>
|
PARAMETER stop "<|eot_id|>"
|
||||||
|
PARAMETER stop "<|reserved_special_token"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Instructions
|
## Instructions
|
||||||
@@ -106,10 +98,10 @@ The `FROM` instruction defines the base model to use when creating a model.
|
|||||||
FROM <model name>:<tag>
|
FROM <model name>:<tag>
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Build from llama2
|
#### Build from llama3
|
||||||
|
|
||||||
```modelfile
|
```modelfile
|
||||||
FROM llama2
|
FROM llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
A list of available base models:
|
A list of available base models:
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ chat_completion = client.chat.completions.create(
|
|||||||
'content': 'Say this is a test',
|
'content': 'Say this is a test',
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
model='llama2',
|
model='llama3',
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -43,7 +43,7 @@ const openai = new OpenAI({
|
|||||||
|
|
||||||
const chatCompletion = await openai.chat.completions.create({
|
const chatCompletion = await openai.chat.completions.create({
|
||||||
messages: [{ role: 'user', content: 'Say this is a test' }],
|
messages: [{ role: 'user', content: 'Say this is a test' }],
|
||||||
model: 'llama2',
|
model: 'llama3',
|
||||||
})
|
})
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -53,7 +53,7 @@ const chatCompletion = await openai.chat.completions.create({
|
|||||||
curl http://localhost:11434/v1/chat/completions \
|
curl http://localhost:11434/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "llama2",
|
"model": "llama3",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
@@ -113,7 +113,7 @@ curl http://localhost:11434/v1/chat/completions \
|
|||||||
Before using a model, pull it locally `ollama pull`:
|
Before using a model, pull it locally `ollama pull`:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
ollama pull llama2
|
ollama pull llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
### Default model names
|
### Default model names
|
||||||
@@ -121,7 +121,7 @@ ollama pull llama2
|
|||||||
For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name:
|
For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name:
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama cp llama2 gpt-3.5-turbo
|
ollama cp llama3 gpt-3.5-turbo
|
||||||
```
|
```
|
||||||
|
|
||||||
Afterwards, this new model name can be specified the `model` field:
|
Afterwards, this new model name can be specified the `model` field:
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ import { Ollama } from "langchain/llms/ollama";
|
|||||||
|
|
||||||
const ollama = new Ollama({
|
const ollama = new Ollama({
|
||||||
baseUrl: "http://localhost:11434",
|
baseUrl: "http://localhost:11434",
|
||||||
model: "llama2",
|
model: "llama3",
|
||||||
});
|
});
|
||||||
|
|
||||||
const answer = await ollama.invoke(`why is the sky blue?`);
|
const answer = await ollama.invoke(`why is the sky blue?`);
|
||||||
@@ -23,7 +23,7 @@ const answer = await ollama.invoke(`why is the sky blue?`);
|
|||||||
console.log(answer);
|
console.log(answer);
|
||||||
```
|
```
|
||||||
|
|
||||||
That will get us the same thing as if we ran `ollama run llama2 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
|
That will get us the same thing as if we ran `ollama run llama3 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
npm install cheerio
|
npm install cheerio
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
<<<<<<< HEAD
|
||||||
# Ollama Windows Preview
|
# Ollama Windows Preview
|
||||||
|
|
||||||
Welcome to the Ollama Windows preview.
|
Welcome to the Ollama Windows preview.
|
||||||
@@ -27,7 +28,7 @@ Logs will often be helpful in diagnosing the problem (see
|
|||||||
|
|
||||||
Here's a quick example showing API access from `powershell`
|
Here's a quick example showing API access from `powershell`
|
||||||
```powershell
|
```powershell
|
||||||
(Invoke-WebRequest -method POST -Body '{"model":"llama2", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
|
(Invoke-WebRequest -method POST -Body '{"model":"llama3", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
|
||||||
```
|
```
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
@@ -45,3 +46,17 @@ the explorer window by hitting `<cmd>+R` and type in:
|
|||||||
- `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH)
|
- `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH)
|
||||||
- `explorer %HOMEPATH%\.ollama` contains models and configuration
|
- `explorer %HOMEPATH%\.ollama` contains models and configuration
|
||||||
- `explorer %TEMP%` contains temporary executable files in one or more `ollama*` directories
|
- `explorer %TEMP%` contains temporary executable files in one or more `ollama*` directories
|
||||||
|
|
||||||
|
|
||||||
|
## Standalone CLI
|
||||||
|
|
||||||
|
The easiest way to install Ollama on Windows is to use the `OllamaSetup.exe`
|
||||||
|
installer. It installs in your account without requiring Administrator rights.
|
||||||
|
We update Ollama regularly to support the latest models, and this installer will
|
||||||
|
help you keep up to date.
|
||||||
|
|
||||||
|
If you'd like to install or integrate Ollama as a service, a standalone
|
||||||
|
`ollama-windows-amd64.zip` zip file is available containing only the Ollama CLI
|
||||||
|
and GPU library dependencies for Nvidia and AMD. This allows for embedding
|
||||||
|
Ollama in existing applications, or running it as a system service via `ollama
|
||||||
|
serve` with tools such as [NSSM](https://nssm.cc/).
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
When calling `ollama`, you can pass it a file to run all the prompts in the file, one after the other:
|
When calling `ollama`, you can pass it a file to run all the prompts in the file, one after the other:
|
||||||
|
|
||||||
`ollama run llama2 < sourcequestions.txt`
|
`ollama run llama3 < sourcequestions.txt`
|
||||||
|
|
||||||
This concept is used in the following example.
|
This concept is used in the following example.
|
||||||
|
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ func main() {
|
|||||||
|
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
req := &api.ChatRequest{
|
req := &api.ChatRequest{
|
||||||
Model: "llama2",
|
Model: "llama3",
|
||||||
Messages: messages,
|
Messages: messages,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ while True:
|
|||||||
template=template,
|
template=template,
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = Ollama(model="llama2:13b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
|
llm = Ollama(model="llama3:8b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
|
||||||
qa_chain = RetrievalQA.from_chain_type(
|
qa_chain = RetrievalQA.from_chain_type(
|
||||||
llm,
|
llm,
|
||||||
retriever=vectorstore.as_retriever(),
|
retriever=vectorstore.as_retriever(),
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
from langchain.llms import Ollama
|
from langchain_community.llms import Ollama
|
||||||
from langchain.document_loaders import WebBaseLoader
|
from langchain_community.document_loaders import WebBaseLoader
|
||||||
from langchain.chains.summarize import load_summarize_chain
|
from langchain.chains.summarize import load_summarize_chain
|
||||||
|
|
||||||
loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally")
|
loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally")
|
||||||
docs = loader.load()
|
docs = loader.load()
|
||||||
|
|
||||||
llm = Ollama(model="llama2")
|
llm = Ollama(model="llama3")
|
||||||
chain = load_summarize_chain(llm, chain_type="stuff")
|
chain = load_summarize_chain(llm, chain_type="stuff")
|
||||||
|
|
||||||
result = chain.run(docs)
|
result = chain.invoke(docs)
|
||||||
print(result)
|
print(result)
|
||||||
|
|||||||
@@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama.
|
|||||||
|
|
||||||
## Running the Example
|
## Running the Example
|
||||||
|
|
||||||
1. Ensure you have the `llama2` model installed:
|
1. Ensure you have the `llama3` model installed:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ollama pull llama2
|
ollama pull llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install the Python Requirements.
|
2. Install the Python Requirements.
|
||||||
@@ -21,4 +21,3 @@ This example is a basic "hello world" of using LangChain with Ollama.
|
|||||||
```bash
|
```bash
|
||||||
python main.py
|
python main.py
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from langchain.llms import Ollama
|
from langchain.llms import Ollama
|
||||||
|
|
||||||
input = input("What is your question?")
|
input = input("What is your question?")
|
||||||
llm = Ollama(model="llama2")
|
llm = Ollama(model="llama3")
|
||||||
res = llm.predict(input)
|
res = llm.predict(input)
|
||||||
print (res)
|
print (res)
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
FROM llama2
|
FROM llama3
|
||||||
PARAMETER temperature 1
|
PARAMETER temperature 1
|
||||||
SYSTEM """
|
SYSTEM """
|
||||||
You are Mario from super mario bros, acting as an assistant.
|
You are Mario from super mario bros, acting as an assistant.
|
||||||
|
|||||||
@@ -2,12 +2,12 @@
|
|||||||
|
|
||||||
# Example character: Mario
|
# Example character: Mario
|
||||||
|
|
||||||
This example shows how to create a basic character using Llama2 as the base model.
|
This example shows how to create a basic character using Llama3 as the base model.
|
||||||
|
|
||||||
To run this example:
|
To run this example:
|
||||||
|
|
||||||
1. Download the Modelfile
|
1. Download the Modelfile
|
||||||
2. `ollama pull llama2` to get the base model used in the model file.
|
2. `ollama pull llama3` to get the base model used in the model file.
|
||||||
3. `ollama create NAME -f ./Modelfile`
|
3. `ollama create NAME -f ./Modelfile`
|
||||||
4. `ollama run NAME`
|
4. `ollama run NAME`
|
||||||
|
|
||||||
@@ -18,7 +18,7 @@ Ask it some questions like "Who are you?" or "Is Peach in trouble again?"
|
|||||||
What the model file looks like:
|
What the model file looks like:
|
||||||
|
|
||||||
```
|
```
|
||||||
FROM llama2
|
FROM llama3
|
||||||
PARAMETER temperature 1
|
PARAMETER temperature 1
|
||||||
SYSTEM """
|
SYSTEM """
|
||||||
You are Mario from Super Mario Bros, acting as an assistant.
|
You are Mario from Super Mario Bros, acting as an assistant.
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import requests
|
|||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
|
|
||||||
model = "llama2"
|
model = "llama3"
|
||||||
template = {
|
template = {
|
||||||
"firstName": "",
|
"firstName": "",
|
||||||
"lastName": "",
|
"lastName": "",
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ countries = [
|
|||||||
"France",
|
"France",
|
||||||
]
|
]
|
||||||
country = random.choice(countries)
|
country = random.choice(countries)
|
||||||
model = "llama2"
|
model = "llama3"
|
||||||
|
|
||||||
prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
|
prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
|
||||||
|
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran
|
|||||||
|
|
||||||
## Running the Example
|
## Running the Example
|
||||||
|
|
||||||
1. Ensure you have the `llama2` model installed:
|
1. Ensure you have the `llama3` model installed:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ollama pull llama2
|
ollama pull llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install the Python Requirements.
|
2. Install the Python Requirements.
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import json
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
|
# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
|
||||||
model = "llama2" # TODO: update this for whatever model you wish to use
|
model = "llama3" # TODO: update this for whatever model you wish to use
|
||||||
|
|
||||||
|
|
||||||
def chat(messages):
|
def chat(messages):
|
||||||
|
|||||||
@@ -4,10 +4,10 @@ The **chat** endpoint is one of two ways to generate text from an LLM with Ollam
|
|||||||
|
|
||||||
## Running the Example
|
## Running the Example
|
||||||
|
|
||||||
1. Ensure you have the `llama2` model installed:
|
1. Ensure you have the `llama3` model installed:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ollama pull llama2
|
ollama pull llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install the Python Requirements.
|
2. Install the Python Requirements.
|
||||||
|
|||||||
@@ -4,10 +4,10 @@ This example demonstrates how one would create a set of 'mentors' you can have a
|
|||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
1. Add llama2 to have the mentors ask your questions:
|
1. Add llama3 to have the mentors ask your questions:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ollama pull llama2
|
ollama pull llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install prerequisites:
|
2. Install prerequisites:
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ async function characterGenerator() {
|
|||||||
ollama.setModel("stablebeluga2:70b-q4_K_M");
|
ollama.setModel("stablebeluga2:70b-q4_K_M");
|
||||||
const bio = await ollama.generate(`create a bio of ${character} in a single long paragraph. Instead of saying '${character} is...' or '${character} was...' use language like 'You are...' or 'You were...'. Then create a paragraph describing the speaking mannerisms and style of ${character}. Don't include anything about how ${character} looked or what they sounded like, just focus on the words they said. Instead of saying '${character} would say...' use language like 'You should say...'. If you use quotes, always use single quotes instead of double quotes. If there are any specific words or phrases you used a lot, show how you used them. `);
|
const bio = await ollama.generate(`create a bio of ${character} in a single long paragraph. Instead of saying '${character} is...' or '${character} was...' use language like 'You are...' or 'You were...'. Then create a paragraph describing the speaking mannerisms and style of ${character}. Don't include anything about how ${character} looked or what they sounded like, just focus on the words they said. Instead of saying '${character} would say...' use language like 'You should say...'. If you use quotes, always use single quotes instead of double quotes. If there are any specific words or phrases you used a lot, show how you used them. `);
|
||||||
|
|
||||||
const thecontents = `FROM llama2\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`;
|
const thecontents = `FROM llama3\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`;
|
||||||
|
|
||||||
fs.writeFile(path.join(directory, 'Modelfile'), thecontents, (err: any) => {
|
fs.writeFile(path.join(directory, 'Modelfile'), thecontents, (err: any) => {
|
||||||
if (err) throw err;
|
if (err) throw err;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import * as readline from "readline";
|
import * as readline from "readline";
|
||||||
|
|
||||||
const model = "llama2";
|
const model = "llama3";
|
||||||
type Message = {
|
type Message = {
|
||||||
role: "assistant" | "user" | "system";
|
role: "assistant" | "user" | "system";
|
||||||
content: string;
|
content: string;
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/server/envconfig"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -24,45 +26,8 @@ func PayloadsDir() (string, error) {
|
|||||||
defer lock.Unlock()
|
defer lock.Unlock()
|
||||||
var err error
|
var err error
|
||||||
if payloadsDir == "" {
|
if payloadsDir == "" {
|
||||||
runnersDir := os.Getenv("OLLAMA_RUNNERS_DIR")
|
runnersDir := envconfig.RunnersDir
|
||||||
// On Windows we do not carry the payloads inside the main executable
|
|
||||||
if runtime.GOOS == "windows" && runnersDir == "" {
|
|
||||||
appExe, err := os.Executable()
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("failed to lookup executable path", "error", err)
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
cwd, err := os.Getwd()
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("failed to lookup working directory", "error", err)
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
var paths []string
|
|
||||||
for _, root := range []string{filepath.Dir(appExe), cwd} {
|
|
||||||
paths = append(paths,
|
|
||||||
filepath.Join(root),
|
|
||||||
filepath.Join(root, "windows-"+runtime.GOARCH),
|
|
||||||
filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try a few variations to improve developer experience when building from source in the local tree
|
|
||||||
for _, p := range paths {
|
|
||||||
candidate := filepath.Join(p, "ollama_runners")
|
|
||||||
_, err := os.Stat(candidate)
|
|
||||||
if err == nil {
|
|
||||||
runnersDir = candidate
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if runnersDir == "" {
|
|
||||||
err = fmt.Errorf("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
|
|
||||||
slog.Error("incomplete distribution", "error", err)
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if runnersDir != "" {
|
if runnersDir != "" {
|
||||||
payloadsDir = runnersDir
|
payloadsDir = runnersDir
|
||||||
return payloadsDir, nil
|
return payloadsDir, nil
|
||||||
@@ -70,7 +35,7 @@ func PayloadsDir() (string, error) {
|
|||||||
|
|
||||||
// The remainder only applies on non-windows where we still carry payloads in the main executable
|
// The remainder only applies on non-windows where we still carry payloads in the main executable
|
||||||
cleanupTmpDirs()
|
cleanupTmpDirs()
|
||||||
tmpDir := os.Getenv("OLLAMA_TMPDIR")
|
tmpDir := envconfig.TmpDir
|
||||||
if tmpDir == "" {
|
if tmpDir == "" {
|
||||||
tmpDir, err = os.MkdirTemp("", "ollama")
|
tmpDir, err = os.MkdirTemp("", "ollama")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -133,7 +98,7 @@ func cleanupTmpDirs() {
|
|||||||
func Cleanup() {
|
func Cleanup() {
|
||||||
lock.Lock()
|
lock.Lock()
|
||||||
defer lock.Unlock()
|
defer lock.Unlock()
|
||||||
runnersDir := os.Getenv("OLLAMA_RUNNERS_DIR")
|
runnersDir := envconfig.RunnersDir
|
||||||
if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" {
|
if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" {
|
||||||
// We want to fully clean up the tmpdir parent of the payloads dir
|
// We want to fully clean up the tmpdir parent of the payloads dir
|
||||||
tmpDir := filepath.Clean(filepath.Join(payloadsDir, ".."))
|
tmpDir := filepath.Clean(filepath.Join(payloadsDir, ".."))
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import (
|
|||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
|
"github.com/ollama/ollama/server/envconfig"
|
||||||
)
|
)
|
||||||
|
|
||||||
type handles struct {
|
type handles struct {
|
||||||
@@ -268,7 +269,7 @@ func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func getVerboseState() C.uint16_t {
|
func getVerboseState() C.uint16_t {
|
||||||
if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
|
if envconfig.Debug {
|
||||||
return C.uint16_t(1)
|
return C.uint16_t(1)
|
||||||
}
|
}
|
||||||
return C.uint16_t(0)
|
return C.uint16_t(0)
|
||||||
|
|||||||
117
integration/max_queue_test.go
Normal file
117
integration/max_queue_test.go
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
//go:build integration
|
||||||
|
|
||||||
|
package integration
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMaxQueue(t *testing.T) {
|
||||||
|
// Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU
|
||||||
|
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
|
||||||
|
threadCount := 32
|
||||||
|
mq := os.Getenv("OLLAMA_MAX_QUEUE")
|
||||||
|
if mq != "" {
|
||||||
|
var err error
|
||||||
|
threadCount, err = strconv.Atoi(mq)
|
||||||
|
require.NoError(t, err)
|
||||||
|
} else {
|
||||||
|
os.Setenv("OLLAMA_MAX_QUEUE", fmt.Sprintf("%d", threadCount))
|
||||||
|
}
|
||||||
|
|
||||||
|
req := api.GenerateRequest{
|
||||||
|
Model: "orca-mini",
|
||||||
|
Prompt: "write a long historical fiction story about christopher columbus. use at least 10 facts from his actual journey",
|
||||||
|
Options: map[string]interface{}{
|
||||||
|
"seed": 42,
|
||||||
|
"temperature": 0.0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
resp := []string{"explore", "discover", "ocean"}
|
||||||
|
|
||||||
|
// CPU mode takes much longer at the limit with a large queue setting
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
client, _, cleanup := InitServerConnection(ctx, t)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
require.NoError(t, PullIfMissing(ctx, client, req.Model))
|
||||||
|
|
||||||
|
// Context for the worker threads so we can shut them down
|
||||||
|
// embedCtx, embedCancel := context.WithCancel(ctx)
|
||||||
|
embedCtx := ctx
|
||||||
|
|
||||||
|
var genwg sync.WaitGroup
|
||||||
|
go func() {
|
||||||
|
genwg.Add(1)
|
||||||
|
defer genwg.Done()
|
||||||
|
slog.Info("Starting generate request")
|
||||||
|
DoGenerate(ctx, t, client, req, resp, 45*time.Second, 5*time.Second)
|
||||||
|
slog.Info("generate completed")
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Give the generate a chance to get started before we start hammering on embed requests
|
||||||
|
time.Sleep(5 * time.Millisecond)
|
||||||
|
|
||||||
|
threadCount += 10 // Add a few extra to ensure we push the queue past its limit
|
||||||
|
busyCount := 0
|
||||||
|
resetByPeerCount := 0
|
||||||
|
canceledCount := 0
|
||||||
|
succesCount := 0
|
||||||
|
counterMu := sync.Mutex{}
|
||||||
|
var embedwg sync.WaitGroup
|
||||||
|
for i := 0; i < threadCount; i++ {
|
||||||
|
go func(i int) {
|
||||||
|
embedwg.Add(1)
|
||||||
|
defer embedwg.Done()
|
||||||
|
slog.Info("embed started", "id", i)
|
||||||
|
embedReq := api.EmbeddingRequest{
|
||||||
|
Model: req.Model,
|
||||||
|
Prompt: req.Prompt,
|
||||||
|
Options: req.Options,
|
||||||
|
}
|
||||||
|
// Fresh client for every request
|
||||||
|
client, _ = GetTestEndpoint()
|
||||||
|
|
||||||
|
resp, genErr := client.Embeddings(embedCtx, &embedReq)
|
||||||
|
counterMu.Lock()
|
||||||
|
defer counterMu.Unlock()
|
||||||
|
switch {
|
||||||
|
case genErr == nil:
|
||||||
|
succesCount++
|
||||||
|
require.Greater(t, len(resp.Embedding), 5) // somewhat arbitrary, but sufficient to be reasonable
|
||||||
|
case errors.Is(genErr, context.Canceled):
|
||||||
|
canceledCount++
|
||||||
|
case strings.Contains(genErr.Error(), "busy"):
|
||||||
|
busyCount++
|
||||||
|
case strings.Contains(genErr.Error(), "connection reset by peer"):
|
||||||
|
resetByPeerCount++
|
||||||
|
default:
|
||||||
|
require.NoError(t, genErr, "%d request failed", i)
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("embed finished", "id", i)
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
genwg.Wait()
|
||||||
|
slog.Info("generate done, waiting for embeds")
|
||||||
|
embedwg.Wait()
|
||||||
|
|
||||||
|
require.Equal(t, resetByPeerCount, 0, "Connections reset by peer, have you updated your fd and socket limits?")
|
||||||
|
require.True(t, busyCount > 0, "no requests hit busy error but some should have")
|
||||||
|
require.True(t, canceledCount == 0, "no requests should have been canceled due to timeout")
|
||||||
|
|
||||||
|
slog.Info("embeds completed", "success", succesCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount)
|
||||||
|
}
|
||||||
2
llm/ext_server/server.cpp
vendored
2
llm/ext_server/server.cpp
vendored
@@ -1186,8 +1186,6 @@ struct llama_server_context
|
|||||||
{"model", params.model_alias},
|
{"model", params.model_alias},
|
||||||
{"tokens_predicted", slot.n_decoded},
|
{"tokens_predicted", slot.n_decoded},
|
||||||
{"tokens_evaluated", slot.n_prompt_tokens},
|
{"tokens_evaluated", slot.n_prompt_tokens},
|
||||||
{"generation_settings", get_formated_generation(slot)},
|
|
||||||
{"prompt", slot.prompt},
|
|
||||||
{"truncated", slot.truncated},
|
{"truncated", slot.truncated},
|
||||||
{"stopped_eos", slot.stopped_eos},
|
{"stopped_eos", slot.stopped_eos},
|
||||||
{"stopped_word", slot.stopped_word},
|
{"stopped_word", slot.stopped_word},
|
||||||
|
|||||||
@@ -3,12 +3,11 @@ package llm
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/gpu"
|
"github.com/ollama/ollama/gpu"
|
||||||
|
"github.com/ollama/ollama/server/envconfig"
|
||||||
)
|
)
|
||||||
|
|
||||||
// This algorithm looks for a complete fit to determine if we need to unload other models
|
// This algorithm looks for a complete fit to determine if we need to unload other models
|
||||||
@@ -50,15 +49,8 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
|||||||
for _, info := range gpus {
|
for _, info := range gpus {
|
||||||
memoryAvailable += info.FreeMemory
|
memoryAvailable += info.FreeMemory
|
||||||
}
|
}
|
||||||
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
|
if envconfig.MaxVRAM > 0 {
|
||||||
if userLimit != "" {
|
memoryAvailable = envconfig.MaxVRAM
|
||||||
avail, err := strconv.ParseUint(userLimit, 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
|
|
||||||
} else {
|
|
||||||
slog.Info("user override memory limit", "OLLAMA_MAX_VRAM", avail, "actual", memoryAvailable)
|
|
||||||
memoryAvailable = avail
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Debug("evaluating", "library", gpus[0].Library, "gpu_count", len(gpus), "available", format.HumanBytes2(memoryAvailable))
|
slog.Debug("evaluating", "library", gpus[0].Library, "gpu_count", len(gpus), "available", format.HumanBytes2(memoryAvailable))
|
||||||
|
|||||||
24
llm/patches/05-clip-fix.diff
Normal file
24
llm/patches/05-clip-fix.diff
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||||
|
index e3c9bcd4..b43f892d 100644
|
||||||
|
--- a/examples/llava/clip.cpp
|
||||||
|
+++ b/examples/llava/clip.cpp
|
||||||
|
@@ -573,14 +573,16 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
|
struct ggml_tensor * embeddings = inp;
|
||||||
|
if (ctx->has_class_embedding) {
|
||||||
|
embeddings = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hidden_size, num_positions, batch_size);
|
||||||
|
+ }
|
||||||
|
+ ggml_set_name(embeddings, "embeddings");
|
||||||
|
+ ggml_set_input(embeddings);
|
||||||
|
+
|
||||||
|
+ if (ctx->has_class_embedding) {
|
||||||
|
embeddings = ggml_acc(ctx0, embeddings, model.class_embedding,
|
||||||
|
embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], 0);
|
||||||
|
embeddings = ggml_acc(ctx0, embeddings, inp,
|
||||||
|
embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], model.class_embedding->nb[1]);
|
||||||
|
}
|
||||||
|
- ggml_set_name(embeddings, "embeddings");
|
||||||
|
- ggml_set_input(embeddings);
|
||||||
|
-
|
||||||
|
|
||||||
|
struct ggml_tensor * positions = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_positions);
|
||||||
|
ggml_set_name(positions, "positions");
|
||||||
@@ -26,6 +26,7 @@ import (
|
|||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/gpu"
|
"github.com/ollama/ollama/gpu"
|
||||||
|
"github.com/ollama/ollama/server/envconfig"
|
||||||
)
|
)
|
||||||
|
|
||||||
type LlamaServer interface {
|
type LlamaServer interface {
|
||||||
@@ -124,7 +125,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||||||
} else {
|
} else {
|
||||||
servers = serversForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant
|
servers = serversForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant
|
||||||
}
|
}
|
||||||
demandLib := strings.Trim(os.Getenv("OLLAMA_LLM_LIBRARY"), "\"' ")
|
demandLib := envconfig.LLMLibrary
|
||||||
if demandLib != "" {
|
if demandLib != "" {
|
||||||
serverPath := availableServers[demandLib]
|
serverPath := availableServers[demandLib]
|
||||||
if serverPath == "" {
|
if serverPath == "" {
|
||||||
@@ -145,7 +146,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||||||
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
||||||
"--embedding",
|
"--embedding",
|
||||||
}
|
}
|
||||||
if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
|
if envconfig.Debug {
|
||||||
params = append(params, "--log-format", "json")
|
params = append(params, "--log-format", "json")
|
||||||
} else {
|
} else {
|
||||||
params = append(params, "--log-disable")
|
params = append(params, "--log-disable")
|
||||||
@@ -155,7 +156,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||||||
params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", opts.NumGPU))
|
params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", opts.NumGPU))
|
||||||
}
|
}
|
||||||
|
|
||||||
if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
|
if envconfig.Debug {
|
||||||
params = append(params, "--verbose")
|
params = append(params, "--verbose")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -193,16 +194,15 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||||||
params = append(params, "--numa")
|
params = append(params, "--numa")
|
||||||
}
|
}
|
||||||
|
|
||||||
// "--cont-batching", // TODO - doesn't seem to have any noticeable perf change for multiple requests
|
numParallel := envconfig.NumParallel
|
||||||
numParallel := 1
|
|
||||||
if onp := os.Getenv("OLLAMA_NUM_PARALLEL"); onp != "" {
|
// TODO (jmorganca): multimodal models don't support parallel yet
|
||||||
numParallel, err = strconv.Atoi(onp)
|
// see https://github.com/ollama/ollama/issues/4165
|
||||||
if err != nil || numParallel <= 0 {
|
if len(projectors) > 0 {
|
||||||
err = fmt.Errorf("invalid OLLAMA_NUM_PARALLEL=%s must be greater than zero - %w", onp, err)
|
numParallel = 1
|
||||||
slog.Error("misconfiguration", "error", err)
|
slog.Warn("multimodal models don't support parallel requests yet")
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
params = append(params, "--parallel", fmt.Sprintf("%d", numParallel))
|
params = append(params, "--parallel", fmt.Sprintf("%d", numParallel))
|
||||||
|
|
||||||
for i := 0; i < len(servers); i++ {
|
for i := 0; i < len(servers); i++ {
|
||||||
|
|||||||
174
server/envconfig/config.go
Normal file
174
server/envconfig/config.go
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
package envconfig
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// Set via OLLAMA_ORIGINS in the environment
|
||||||
|
AllowOrigins []string
|
||||||
|
// Set via OLLAMA_DEBUG in the environment
|
||||||
|
Debug bool
|
||||||
|
// Set via OLLAMA_LLM_LIBRARY in the environment
|
||||||
|
LLMLibrary string
|
||||||
|
// Set via OLLAMA_MAX_LOADED_MODELS in the environment
|
||||||
|
MaxRunners int
|
||||||
|
// Set via OLLAMA_MAX_QUEUE in the environment
|
||||||
|
MaxQueuedRequests int
|
||||||
|
// Set via OLLAMA_MAX_VRAM in the environment
|
||||||
|
MaxVRAM uint64
|
||||||
|
// Set via OLLAMA_NOPRUNE in the environment
|
||||||
|
NoPrune bool
|
||||||
|
// Set via OLLAMA_NUM_PARALLEL in the environment
|
||||||
|
NumParallel int
|
||||||
|
// Set via OLLAMA_RUNNERS_DIR in the environment
|
||||||
|
RunnersDir string
|
||||||
|
// Set via OLLAMA_TMPDIR in the environment
|
||||||
|
TmpDir string
|
||||||
|
)
|
||||||
|
|
||||||
|
func AsMap() map[string]string {
|
||||||
|
return map[string]string{
|
||||||
|
"OLLAMA_ORIGINS": fmt.Sprintf("%v", AllowOrigins),
|
||||||
|
"OLLAMA_DEBUG": fmt.Sprintf("%v", Debug),
|
||||||
|
"OLLAMA_LLM_LIBRARY": fmt.Sprintf("%v", LLMLibrary),
|
||||||
|
"OLLAMA_MAX_LOADED_MODELS": fmt.Sprintf("%v", MaxRunners),
|
||||||
|
"OLLAMA_MAX_QUEUE": fmt.Sprintf("%v", MaxQueuedRequests),
|
||||||
|
"OLLAMA_MAX_VRAM": fmt.Sprintf("%v", MaxVRAM),
|
||||||
|
"OLLAMA_NOPRUNE": fmt.Sprintf("%v", NoPrune),
|
||||||
|
"OLLAMA_NUM_PARALLEL": fmt.Sprintf("%v", NumParallel),
|
||||||
|
"OLLAMA_RUNNERS_DIR": fmt.Sprintf("%v", RunnersDir),
|
||||||
|
"OLLAMA_TMPDIR": fmt.Sprintf("%v", TmpDir),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var defaultAllowOrigins = []string{
|
||||||
|
"localhost",
|
||||||
|
"127.0.0.1",
|
||||||
|
"0.0.0.0",
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean quotes and spaces from the value
|
||||||
|
func clean(key string) string {
|
||||||
|
return strings.Trim(os.Getenv(key), "\"' ")
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// default values
|
||||||
|
NumParallel = 1
|
||||||
|
MaxRunners = 1
|
||||||
|
MaxQueuedRequests = 512
|
||||||
|
|
||||||
|
LoadConfig()
|
||||||
|
}
|
||||||
|
|
||||||
|
func LoadConfig() {
|
||||||
|
if debug := clean("OLLAMA_DEBUG"); debug != "" {
|
||||||
|
d, err := strconv.ParseBool(debug)
|
||||||
|
if err == nil {
|
||||||
|
Debug = d
|
||||||
|
} else {
|
||||||
|
Debug = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RunnersDir = clean("OLLAMA_RUNNERS_DIR")
|
||||||
|
if runtime.GOOS == "windows" && RunnersDir == "" {
|
||||||
|
// On Windows we do not carry the payloads inside the main executable
|
||||||
|
appExe, err := os.Executable()
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("failed to lookup executable path", "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cwd, err := os.Getwd()
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("failed to lookup working directory", "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var paths []string
|
||||||
|
for _, root := range []string{filepath.Dir(appExe), cwd} {
|
||||||
|
paths = append(paths,
|
||||||
|
filepath.Join(root),
|
||||||
|
filepath.Join(root, "windows-"+runtime.GOARCH),
|
||||||
|
filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try a few variations to improve developer experience when building from source in the local tree
|
||||||
|
for _, p := range paths {
|
||||||
|
candidate := filepath.Join(p, "ollama_runners")
|
||||||
|
_, err := os.Stat(candidate)
|
||||||
|
if err == nil {
|
||||||
|
RunnersDir = candidate
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if RunnersDir == "" {
|
||||||
|
slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TmpDir = clean("OLLAMA_TMPDIR")
|
||||||
|
|
||||||
|
userLimit := clean("OLLAMA_MAX_VRAM")
|
||||||
|
if userLimit != "" {
|
||||||
|
avail, err := strconv.ParseUint(userLimit, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
|
||||||
|
} else {
|
||||||
|
MaxVRAM = avail
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LLMLibrary = clean("OLLAMA_LLM_LIBRARY")
|
||||||
|
|
||||||
|
if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" {
|
||||||
|
val, err := strconv.Atoi(onp)
|
||||||
|
if err != nil || val <= 0 {
|
||||||
|
slog.Error("invalid setting must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
|
||||||
|
} else {
|
||||||
|
NumParallel = val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
|
||||||
|
NoPrune = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if origins := clean("OLLAMA_ORIGINS"); origins != "" {
|
||||||
|
AllowOrigins = strings.Split(origins, ",")
|
||||||
|
}
|
||||||
|
for _, allowOrigin := range defaultAllowOrigins {
|
||||||
|
AllowOrigins = append(AllowOrigins,
|
||||||
|
fmt.Sprintf("http://%s", allowOrigin),
|
||||||
|
fmt.Sprintf("https://%s", allowOrigin),
|
||||||
|
fmt.Sprintf("http://%s:*", allowOrigin),
|
||||||
|
fmt.Sprintf("https://%s:*", allowOrigin),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
maxRunners := clean("OLLAMA_MAX_LOADED_MODELS")
|
||||||
|
if maxRunners != "" {
|
||||||
|
m, err := strconv.Atoi(maxRunners)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("invalid setting", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err)
|
||||||
|
} else {
|
||||||
|
MaxRunners = m
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" {
|
||||||
|
p, err := strconv.Atoi(onp)
|
||||||
|
if err != nil || p <= 0 {
|
||||||
|
slog.Error("invalid setting", "OLLAMA_MAX_QUEUE", onp, "error", err)
|
||||||
|
} else {
|
||||||
|
MaxQueuedRequests = p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
20
server/envconfig/config_test.go
Normal file
20
server/envconfig/config_test.go
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
package envconfig
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestConfig(t *testing.T) {
|
||||||
|
os.Setenv("OLLAMA_DEBUG", "")
|
||||||
|
LoadConfig()
|
||||||
|
require.False(t, Debug)
|
||||||
|
os.Setenv("OLLAMA_DEBUG", "false")
|
||||||
|
LoadConfig()
|
||||||
|
require.False(t, Debug)
|
||||||
|
os.Setenv("OLLAMA_DEBUG", "1")
|
||||||
|
LoadConfig()
|
||||||
|
require.True(t, Debug)
|
||||||
|
}
|
||||||
@@ -29,7 +29,7 @@ import (
|
|||||||
"github.com/ollama/ollama/convert"
|
"github.com/ollama/ollama/convert"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/server/envconfig"
|
||||||
"github.com/ollama/ollama/types/errtypes"
|
"github.com/ollama/ollama/types/errtypes"
|
||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
@@ -63,46 +63,74 @@ func (m *Model) IsEmbedding() bool {
|
|||||||
return slices.Contains(m.Config.ModelFamilies, "bert") || slices.Contains(m.Config.ModelFamilies, "nomic-bert")
|
return slices.Contains(m.Config.ModelFamilies, "bert") || slices.Contains(m.Config.ModelFamilies, "nomic-bert")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Model) Commands() (cmds []parser.Command) {
|
func (m *Model) String() string {
|
||||||
cmds = append(cmds, parser.Command{Name: "model", Args: m.ModelPath})
|
var modelfile model.File
|
||||||
|
|
||||||
|
modelfile.Commands = append(modelfile.Commands, model.Command{
|
||||||
|
Name: "model",
|
||||||
|
Args: m.ModelPath,
|
||||||
|
})
|
||||||
|
|
||||||
if m.Template != "" {
|
if m.Template != "" {
|
||||||
cmds = append(cmds, parser.Command{Name: "template", Args: m.Template})
|
modelfile.Commands = append(modelfile.Commands, model.Command{
|
||||||
|
Name: "template",
|
||||||
|
Args: m.Template,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
if m.System != "" {
|
if m.System != "" {
|
||||||
cmds = append(cmds, parser.Command{Name: "system", Args: m.System})
|
modelfile.Commands = append(modelfile.Commands, model.Command{
|
||||||
|
Name: "system",
|
||||||
|
Args: m.System,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, adapter := range m.AdapterPaths {
|
for _, adapter := range m.AdapterPaths {
|
||||||
cmds = append(cmds, parser.Command{Name: "adapter", Args: adapter})
|
modelfile.Commands = append(modelfile.Commands, model.Command{
|
||||||
|
Name: "adapter",
|
||||||
|
Args: adapter,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, projector := range m.ProjectorPaths {
|
for _, projector := range m.ProjectorPaths {
|
||||||
cmds = append(cmds, parser.Command{Name: "projector", Args: projector})
|
modelfile.Commands = append(modelfile.Commands, model.Command{
|
||||||
|
Name: "projector",
|
||||||
|
Args: projector,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
for k, v := range m.Options {
|
for k, v := range m.Options {
|
||||||
switch v := v.(type) {
|
switch v := v.(type) {
|
||||||
case []any:
|
case []any:
|
||||||
for _, s := range v {
|
for _, s := range v {
|
||||||
cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", s)})
|
modelfile.Commands = append(modelfile.Commands, model.Command{
|
||||||
|
Name: k,
|
||||||
|
Args: fmt.Sprintf("%v", s),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", v)})
|
modelfile.Commands = append(modelfile.Commands, model.Command{
|
||||||
|
Name: k,
|
||||||
|
Args: fmt.Sprintf("%v", v),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, license := range m.License {
|
for _, license := range m.License {
|
||||||
cmds = append(cmds, parser.Command{Name: "license", Args: license})
|
modelfile.Commands = append(modelfile.Commands, model.Command{
|
||||||
|
Name: "license",
|
||||||
|
Args: license,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, msg := range m.Messages {
|
for _, msg := range m.Messages {
|
||||||
cmds = append(cmds, parser.Command{Name: "message", Args: fmt.Sprintf("%s %s", msg.Role, msg.Content)})
|
modelfile.Commands = append(modelfile.Commands, model.Command{
|
||||||
|
Name: "message",
|
||||||
|
Args: fmt.Sprintf("%s %s", msg.Role, msg.Content),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
return cmds
|
return modelfile.String()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Message struct {
|
type Message struct {
|
||||||
@@ -329,7 +357,7 @@ func realpath(mfDir, from string) string {
|
|||||||
return abspath
|
return abspath
|
||||||
}
|
}
|
||||||
|
|
||||||
func CreateModel(ctx context.Context, name, modelFileDir, quantization string, commands []parser.Command, fn func(resp api.ProgressResponse)) error {
|
func CreateModel(ctx context.Context, name, modelFileDir, quantization string, modelfile *model.File, fn func(resp api.ProgressResponse)) error {
|
||||||
deleteMap := make(map[string]struct{})
|
deleteMap := make(map[string]struct{})
|
||||||
if manifest, _, err := GetManifest(ParseModelPath(name)); err == nil {
|
if manifest, _, err := GetManifest(ParseModelPath(name)); err == nil {
|
||||||
for _, layer := range append(manifest.Layers, manifest.Config) {
|
for _, layer := range append(manifest.Layers, manifest.Config) {
|
||||||
@@ -351,7 +379,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
|
|||||||
params := make(map[string][]string)
|
params := make(map[string][]string)
|
||||||
fromParams := make(map[string]any)
|
fromParams := make(map[string]any)
|
||||||
|
|
||||||
for _, c := range commands {
|
for _, c := range modelfile.Commands {
|
||||||
mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
|
mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
|
||||||
|
|
||||||
switch c.Name {
|
switch c.Name {
|
||||||
@@ -668,7 +696,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
|
if !envconfig.NoPrune {
|
||||||
if err := deleteUnusedLayers(nil, deleteMap, false); err != nil {
|
if err := deleteUnusedLayers(nil, deleteMap, false); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -999,7 +1027,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
|
|||||||
// build deleteMap to prune unused layers
|
// build deleteMap to prune unused layers
|
||||||
deleteMap := make(map[string]struct{})
|
deleteMap := make(map[string]struct{})
|
||||||
|
|
||||||
if noprune = os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
|
if !envconfig.NoPrune {
|
||||||
manifest, _, err = GetManifest(mp)
|
manifest, _, err = GetManifest(mp)
|
||||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
return err
|
return err
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -25,9 +26,10 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
ErrInvalidImageFormat = errors.New("invalid image format")
|
ErrInvalidImageFormat = errors.New("invalid image format")
|
||||||
ErrInvalidProtocol = errors.New("invalid protocol scheme")
|
ErrInvalidProtocol = errors.New("invalid protocol scheme")
|
||||||
ErrInsecureProtocol = errors.New("insecure protocol http")
|
ErrInsecureProtocol = errors.New("insecure protocol http")
|
||||||
|
ErrInvalidDigestFormat = errors.New("invalid digest format")
|
||||||
)
|
)
|
||||||
|
|
||||||
func ParseModelPath(name string) ModelPath {
|
func ParseModelPath(name string) ModelPath {
|
||||||
@@ -149,6 +151,17 @@ func GetBlobsPath(digest string) (string, error) {
|
|||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// only accept actual sha256 digests
|
||||||
|
pattern := "^sha256[:-][0-9a-fA-F]{64}$"
|
||||||
|
re := regexp.MustCompile(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
if digest != "" && !re.MatchString(digest) {
|
||||||
|
return "", ErrInvalidDigestFormat
|
||||||
|
}
|
||||||
|
|
||||||
digest = strings.ReplaceAll(digest, ":", "-")
|
digest = strings.ReplaceAll(digest, ":", "-")
|
||||||
path := filepath.Join(dir, "blobs", digest)
|
path := filepath.Join(dir, "blobs", digest)
|
||||||
dirPath := filepath.Dir(path)
|
dirPath := filepath.Dir(path)
|
||||||
|
|||||||
@@ -1,6 +1,73 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import "testing"
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetBlobsPath(t *testing.T) {
|
||||||
|
// GetBlobsPath expects an actual directory to exist
|
||||||
|
dir, err := os.MkdirTemp("", "ollama-test")
|
||||||
|
assert.Nil(t, err)
|
||||||
|
defer os.RemoveAll(dir)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
digest string
|
||||||
|
expected string
|
||||||
|
err error
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
"empty digest",
|
||||||
|
"",
|
||||||
|
filepath.Join(dir, "blobs"),
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"valid with colon",
|
||||||
|
"sha256:456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9",
|
||||||
|
filepath.Join(dir, "blobs", "sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9"),
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"valid with dash",
|
||||||
|
"sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9",
|
||||||
|
filepath.Join(dir, "blobs", "sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9"),
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"digest too short",
|
||||||
|
"sha256-45640291",
|
||||||
|
"",
|
||||||
|
ErrInvalidDigestFormat,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"digest too long",
|
||||||
|
"sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9aaaaaaaaaa",
|
||||||
|
"",
|
||||||
|
ErrInvalidDigestFormat,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"digest invalid chars",
|
||||||
|
"../sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7a",
|
||||||
|
"",
|
||||||
|
ErrInvalidDigestFormat,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
t.Setenv("OLLAMA_MODELS", dir)
|
||||||
|
|
||||||
|
got, err := GetBlobsPath(tc.digest)
|
||||||
|
|
||||||
|
assert.ErrorIs(t, tc.err, err, tc.name)
|
||||||
|
assert.Equal(t, tc.expected, got, tc.name)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseModelPath(t *testing.T) {
|
func TestParseModelPath(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"cmp"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
@@ -28,7 +29,7 @@ import (
|
|||||||
"github.com/ollama/ollama/gpu"
|
"github.com/ollama/ollama/gpu"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/openai"
|
"github.com/ollama/ollama/openai"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/server/envconfig"
|
||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
)
|
)
|
||||||
@@ -146,12 +147,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
select {
|
select {
|
||||||
case runner = <-rCh:
|
case runner = <-rCh:
|
||||||
case err = <-eCh:
|
case err = <-eCh:
|
||||||
if errors.Is(err, context.Canceled) {
|
handleErrorResponse(c, err)
|
||||||
c.JSON(499, gin.H{"error": "request canceled"})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -394,12 +390,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
|
|||||||
select {
|
select {
|
||||||
case runner = <-rCh:
|
case runner = <-rCh:
|
||||||
case err = <-eCh:
|
case err = <-eCh:
|
||||||
if errors.Is(err, context.Canceled) {
|
handleErrorResponse(c, err)
|
||||||
c.JSON(499, gin.H{"error": "request canceled"})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -522,28 +513,17 @@ func (s *Server) PushModelHandler(c *gin.Context) {
|
|||||||
|
|
||||||
func (s *Server) CreateModelHandler(c *gin.Context) {
|
func (s *Server) CreateModelHandler(c *gin.Context) {
|
||||||
var req api.CreateRequest
|
var req api.CreateRequest
|
||||||
err := c.ShouldBindJSON(&req)
|
if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
|
||||||
switch {
|
|
||||||
case errors.Is(err, io.EOF):
|
|
||||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
||||||
return
|
return
|
||||||
case err != nil:
|
} else if err != nil {
|
||||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
var model string
|
name := model.ParseName(cmp.Or(req.Model, req.Name))
|
||||||
if req.Model != "" {
|
if !name.IsValid() {
|
||||||
model = req.Model
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid model name"})
|
||||||
} else if req.Name != "" {
|
|
||||||
model = req.Name
|
|
||||||
} else {
|
|
||||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := ParseModelPath(model).Validate(); err != nil {
|
|
||||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -552,19 +532,19 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
var modelfile io.Reader = strings.NewReader(req.Modelfile)
|
var r io.Reader = strings.NewReader(req.Modelfile)
|
||||||
if req.Path != "" && req.Modelfile == "" {
|
if req.Path != "" && req.Modelfile == "" {
|
||||||
mf, err := os.Open(req.Path)
|
f, err := os.Open(req.Path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer mf.Close()
|
defer f.Close()
|
||||||
|
|
||||||
modelfile = mf
|
r = f
|
||||||
}
|
}
|
||||||
|
|
||||||
commands, err := parser.Parse(modelfile)
|
modelfile, err := model.ParseFile(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||||
return
|
return
|
||||||
@@ -580,7 +560,7 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
|
|||||||
ctx, cancel := context.WithCancel(c.Request.Context())
|
ctx, cancel := context.WithCancel(c.Request.Context())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
if err := CreateModel(ctx, model, filepath.Dir(req.Path), req.Quantization, commands, fn); err != nil {
|
if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), req.Quantization, modelfile, fn); err != nil {
|
||||||
ch <- gin.H{"error": err.Error()}
|
ch <- gin.H{"error": err.Error()}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@@ -732,7 +712,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
|||||||
fmt.Fprintln(&sb, "# Modelfile generate by \"ollama show\"")
|
fmt.Fprintln(&sb, "# Modelfile generate by \"ollama show\"")
|
||||||
fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
|
fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
|
||||||
fmt.Fprintf(&sb, "# FROM %s\n\n", model.ShortName)
|
fmt.Fprintf(&sb, "# FROM %s\n\n", model.ShortName)
|
||||||
fmt.Fprint(&sb, parser.Format(model.Commands()))
|
fmt.Fprint(&sb, model.String())
|
||||||
resp.Modelfile = sb.String()
|
resp.Modelfile = sb.String()
|
||||||
|
|
||||||
return resp, nil
|
return resp, nil
|
||||||
@@ -880,12 +860,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
|
|||||||
c.Status(http.StatusCreated)
|
c.Status(http.StatusCreated)
|
||||||
}
|
}
|
||||||
|
|
||||||
var defaultAllowOrigins = []string{
|
|
||||||
"localhost",
|
|
||||||
"127.0.0.1",
|
|
||||||
"0.0.0.0",
|
|
||||||
}
|
|
||||||
|
|
||||||
func isLocalIP(ip netip.Addr) bool {
|
func isLocalIP(ip netip.Addr) bool {
|
||||||
if interfaces, err := net.Interfaces(); err == nil {
|
if interfaces, err := net.Interfaces(); err == nil {
|
||||||
for _, iface := range interfaces {
|
for _, iface := range interfaces {
|
||||||
@@ -969,19 +943,7 @@ func (s *Server) GenerateRoutes() http.Handler {
|
|||||||
config := cors.DefaultConfig()
|
config := cors.DefaultConfig()
|
||||||
config.AllowWildcard = true
|
config.AllowWildcard = true
|
||||||
config.AllowBrowserExtensions = true
|
config.AllowBrowserExtensions = true
|
||||||
|
config.AllowOrigins = envconfig.AllowOrigins
|
||||||
if allowedOrigins := strings.Trim(os.Getenv("OLLAMA_ORIGINS"), "\"'"); allowedOrigins != "" {
|
|
||||||
config.AllowOrigins = strings.Split(allowedOrigins, ",")
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, allowOrigin := range defaultAllowOrigins {
|
|
||||||
config.AllowOrigins = append(config.AllowOrigins,
|
|
||||||
fmt.Sprintf("http://%s", allowOrigin),
|
|
||||||
fmt.Sprintf("https://%s", allowOrigin),
|
|
||||||
fmt.Sprintf("http://%s:*", allowOrigin),
|
|
||||||
fmt.Sprintf("https://%s:*", allowOrigin),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
r := gin.Default()
|
r := gin.Default()
|
||||||
r.Use(
|
r.Use(
|
||||||
@@ -1020,10 +982,11 @@ func (s *Server) GenerateRoutes() http.Handler {
|
|||||||
|
|
||||||
func Serve(ln net.Listener) error {
|
func Serve(ln net.Listener) error {
|
||||||
level := slog.LevelInfo
|
level := slog.LevelInfo
|
||||||
if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
|
if envconfig.Debug {
|
||||||
level = slog.LevelDebug
|
level = slog.LevelDebug
|
||||||
}
|
}
|
||||||
|
|
||||||
|
slog.Info("server config", "env", envconfig.AsMap())
|
||||||
handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
||||||
Level: level,
|
Level: level,
|
||||||
AddSource: true,
|
AddSource: true,
|
||||||
@@ -1047,7 +1010,7 @@ func Serve(ln net.Listener) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
|
if !envconfig.NoPrune {
|
||||||
// clean up unused layers and manifests
|
// clean up unused layers and manifests
|
||||||
if err := PruneLayers(); err != nil {
|
if err := PruneLayers(); err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -1223,12 +1186,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
select {
|
select {
|
||||||
case runner = <-rCh:
|
case runner = <-rCh:
|
||||||
case err = <-eCh:
|
case err = <-eCh:
|
||||||
if errors.Is(err, context.Canceled) {
|
handleErrorResponse(c, err)
|
||||||
c.JSON(499, gin.H{"error": "request canceled"})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1349,3 +1307,15 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
|
|
||||||
streamResponse(c, ch)
|
streamResponse(c, ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func handleErrorResponse(c *gin.Context, err error) {
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
c.JSON(499, gin.H{"error": "request canceled"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if errors.Is(err, ErrMaxQueue) {
|
||||||
|
c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ import (
|
|||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/types/model"
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -55,13 +55,13 @@ func Test_Routes(t *testing.T) {
|
|||||||
createTestModel := func(t *testing.T, name string) {
|
createTestModel := func(t *testing.T, name string) {
|
||||||
fname := createTestFile(t, "ollama-model")
|
fname := createTestFile(t, "ollama-model")
|
||||||
|
|
||||||
modelfile := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar", fname))
|
r := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar", fname))
|
||||||
commands, err := parser.Parse(modelfile)
|
modelfile, err := model.ParseFile(r)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
fn := func(resp api.ProgressResponse) {
|
fn := func(resp api.ProgressResponse) {
|
||||||
t.Logf("Status: %s", resp.Status)
|
t.Logf("Status: %s", resp.Status)
|
||||||
}
|
}
|
||||||
err = CreateModel(context.TODO(), name, "", "", commands, fn)
|
err = CreateModel(context.TODO(), name, "", "", modelfile, fn)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,10 +5,8 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
|
||||||
"reflect"
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -17,6 +15,7 @@ import (
|
|||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/gpu"
|
"github.com/ollama/ollama/gpu"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
|
"github.com/ollama/ollama/server/envconfig"
|
||||||
"golang.org/x/exp/slices"
|
"golang.org/x/exp/slices"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -43,35 +42,14 @@ type Scheduler struct {
|
|||||||
getGpuFn func() gpu.GpuInfoList
|
getGpuFn func() gpu.GpuInfoList
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO set this to zero after a release or two, to enable multiple models by default
|
var ErrMaxQueue = fmt.Errorf("server busy, please try again. maximum pending requests exceeded")
|
||||||
var loadedMax = 1 // Maximum runners; < 1 maps to as many as will fit in VRAM (unlimited for CPU runners)
|
|
||||||
var maxQueuedRequests = 10 // TODO configurable
|
|
||||||
var numParallel = 1
|
|
||||||
|
|
||||||
func InitScheduler(ctx context.Context) *Scheduler {
|
func InitScheduler(ctx context.Context) *Scheduler {
|
||||||
maxRunners := os.Getenv("OLLAMA_MAX_LOADED_MODELS")
|
|
||||||
if maxRunners != "" {
|
|
||||||
m, err := strconv.Atoi(maxRunners)
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("invalid setting", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err)
|
|
||||||
} else {
|
|
||||||
loadedMax = m
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if onp := os.Getenv("OLLAMA_NUM_PARALLEL"); onp != "" {
|
|
||||||
p, err := strconv.Atoi(onp)
|
|
||||||
if err != nil || p <= 0 {
|
|
||||||
slog.Error("invalid parallel setting, must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
|
|
||||||
} else {
|
|
||||||
numParallel = p
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sched := &Scheduler{
|
sched := &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, maxQueuedRequests),
|
pendingReqCh: make(chan *LlmRequest, envconfig.MaxQueuedRequests),
|
||||||
finishedReqCh: make(chan *LlmRequest, maxQueuedRequests),
|
finishedReqCh: make(chan *LlmRequest, envconfig.MaxQueuedRequests),
|
||||||
expiredCh: make(chan *runnerRef, maxQueuedRequests),
|
expiredCh: make(chan *runnerRef, envconfig.MaxQueuedRequests),
|
||||||
unloadedCh: make(chan interface{}, maxQueuedRequests),
|
unloadedCh: make(chan interface{}, envconfig.MaxQueuedRequests),
|
||||||
loaded: make(map[string]*runnerRef),
|
loaded: make(map[string]*runnerRef),
|
||||||
newServerFn: llm.NewLlamaServer,
|
newServerFn: llm.NewLlamaServer,
|
||||||
getGpuFn: gpu.GetGPUInfo,
|
getGpuFn: gpu.GetGPUInfo,
|
||||||
@@ -82,6 +60,9 @@ func InitScheduler(ctx context.Context) *Scheduler {
|
|||||||
|
|
||||||
// context must be canceled to decrement ref count and release the runner
|
// context must be canceled to decrement ref count and release the runner
|
||||||
func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration time.Duration) (chan *runnerRef, chan error) {
|
func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration time.Duration) (chan *runnerRef, chan error) {
|
||||||
|
// allocate a large enough kv cache for all parallel requests
|
||||||
|
opts.NumCtx = opts.NumCtx * envconfig.NumParallel
|
||||||
|
|
||||||
req := &LlmRequest{
|
req := &LlmRequest{
|
||||||
ctx: c,
|
ctx: c,
|
||||||
model: model,
|
model: model,
|
||||||
@@ -90,12 +71,11 @@ func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options,
|
|||||||
successCh: make(chan *runnerRef),
|
successCh: make(chan *runnerRef),
|
||||||
errCh: make(chan error, 1),
|
errCh: make(chan error, 1),
|
||||||
}
|
}
|
||||||
// context split across parallel threads
|
|
||||||
opts.NumCtx = opts.NumCtx * numParallel
|
|
||||||
select {
|
select {
|
||||||
case s.pendingReqCh <- req:
|
case s.pendingReqCh <- req:
|
||||||
default:
|
default:
|
||||||
req.errCh <- fmt.Errorf("server busy, please try again. maximum pending requests exceeded")
|
req.errCh <- ErrMaxQueue
|
||||||
}
|
}
|
||||||
return req.successCh, req.errCh
|
return req.successCh, req.errCh
|
||||||
}
|
}
|
||||||
@@ -134,11 +114,11 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
|||||||
pending.useLoadedRunner(runner, s.finishedReqCh)
|
pending.useLoadedRunner(runner, s.finishedReqCh)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
} else if loadedMax > 0 && loadedCount >= loadedMax {
|
} else if envconfig.MaxRunners > 0 && loadedCount >= envconfig.MaxRunners {
|
||||||
slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount)
|
slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount)
|
||||||
runnerToExpire = s.findRunnerToUnload(pending)
|
runnerToExpire = s.findRunnerToUnload()
|
||||||
} else {
|
} else {
|
||||||
// Either no models are loaded or below loadedMax
|
// Either no models are loaded or below envconfig.MaxRunners
|
||||||
// Get a refreshed GPU list
|
// Get a refreshed GPU list
|
||||||
gpus := s.getGpuFn()
|
gpus := s.getGpuFn()
|
||||||
|
|
||||||
@@ -149,7 +129,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we're CPU only mode, just limit by loadedMax above
|
// If we're CPU only mode, just limit by envconfig.MaxRunners above
|
||||||
// TODO handle system memory exhaustion
|
// TODO handle system memory exhaustion
|
||||||
if (len(gpus) == 1 && gpus[0].Library == "cpu") || pending.opts.NumGPU == 0 {
|
if (len(gpus) == 1 && gpus[0].Library == "cpu") || pending.opts.NumGPU == 0 {
|
||||||
slog.Debug("cpu mode with existing models, loading")
|
slog.Debug("cpu mode with existing models, loading")
|
||||||
@@ -177,7 +157,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
|||||||
s.loadFn(pending, ggml, gpus)
|
s.loadFn(pending, ggml, gpus)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
runnerToExpire = s.findRunnerToUnload(pending)
|
runnerToExpire = s.findRunnerToUnload()
|
||||||
}
|
}
|
||||||
|
|
||||||
if runnerToExpire == nil {
|
if runnerToExpire == nil {
|
||||||
@@ -277,9 +257,9 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s.loadedMu.Lock()
|
||||||
slog.Debug("got lock to unload", "model", runner.model)
|
slog.Debug("got lock to unload", "model", runner.model)
|
||||||
runner.unload()
|
runner.unload()
|
||||||
s.loadedMu.Lock()
|
|
||||||
delete(s.loaded, runner.model)
|
delete(s.loaded, runner.model)
|
||||||
s.loadedMu.Unlock()
|
s.loadedMu.Unlock()
|
||||||
slog.Debug("runner released", "model", runner.model)
|
slog.Debug("runner released", "model", runner.model)
|
||||||
@@ -524,7 +504,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.
|
|||||||
}
|
}
|
||||||
|
|
||||||
// findRunnerToUnload finds a runner to unload to make room for a new model
|
// findRunnerToUnload finds a runner to unload to make room for a new model
|
||||||
func (s *Scheduler) findRunnerToUnload(req *LlmRequest) *runnerRef {
|
func (s *Scheduler) findRunnerToUnload() *runnerRef {
|
||||||
s.loadedMu.Lock()
|
s.loadedMu.Lock()
|
||||||
runnerList := make([]*runnerRef, 0, len(s.loaded))
|
runnerList := make([]*runnerRef, 0, len(s.loaded))
|
||||||
for _, r := range s.loaded {
|
for _, r := range s.loaded {
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import (
|
|||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/gpu"
|
"github.com/ollama/ollama/gpu"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
|
"github.com/ollama/ollama/server/envconfig"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
@@ -27,38 +28,14 @@ func init() {
|
|||||||
func TestInitScheduler(t *testing.T) {
|
func TestInitScheduler(t *testing.T) {
|
||||||
ctx, done := context.WithCancel(context.Background())
|
ctx, done := context.WithCancel(context.Background())
|
||||||
defer done()
|
defer done()
|
||||||
initialMax := loadedMax
|
|
||||||
initialParallel := numParallel
|
|
||||||
s := InitScheduler(ctx)
|
s := InitScheduler(ctx)
|
||||||
require.Equal(t, initialMax, loadedMax)
|
|
||||||
s.loadedMu.Lock()
|
s.loadedMu.Lock()
|
||||||
require.NotNil(t, s.loaded)
|
require.NotNil(t, s.loaded)
|
||||||
s.loadedMu.Unlock()
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
os.Setenv("OLLAMA_MAX_LOADED_MODELS", "blue")
|
|
||||||
s = InitScheduler(ctx)
|
|
||||||
require.Equal(t, initialMax, loadedMax)
|
|
||||||
s.loadedMu.Lock()
|
|
||||||
require.NotNil(t, s.loaded)
|
|
||||||
s.loadedMu.Unlock()
|
|
||||||
|
|
||||||
os.Setenv("OLLAMA_MAX_LOADED_MODELS", "0")
|
|
||||||
s = InitScheduler(ctx)
|
|
||||||
require.Equal(t, 0, loadedMax)
|
|
||||||
s.loadedMu.Lock()
|
|
||||||
require.NotNil(t, s.loaded)
|
|
||||||
s.loadedMu.Unlock()
|
|
||||||
|
|
||||||
os.Setenv("OLLAMA_NUM_PARALLEL", "blue")
|
|
||||||
_ = InitScheduler(ctx)
|
|
||||||
require.Equal(t, initialParallel, numParallel)
|
|
||||||
os.Setenv("OLLAMA_NUM_PARALLEL", "10")
|
|
||||||
_ = InitScheduler(ctx)
|
|
||||||
require.Equal(t, 10, numParallel)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLoad(t *testing.T) {
|
func TestLoad(t *testing.T) {
|
||||||
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
ctx, done := context.WithTimeout(context.Background(), 20*time.Millisecond)
|
||||||
defer done()
|
defer done()
|
||||||
s := InitScheduler(ctx)
|
s := InitScheduler(ctx)
|
||||||
var ggml *llm.GGML // value not used in tests
|
var ggml *llm.GGML // value not used in tests
|
||||||
@@ -174,7 +151,7 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestRequests(t *testing.T) {
|
func TestRequests(t *testing.T) {
|
||||||
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond)
|
||||||
defer done()
|
defer done()
|
||||||
|
|
||||||
// Same model, same request
|
// Same model, same request
|
||||||
@@ -249,7 +226,7 @@ func TestRequests(t *testing.T) {
|
|||||||
t.Errorf("timeout")
|
t.Errorf("timeout")
|
||||||
}
|
}
|
||||||
|
|
||||||
loadedMax = 1
|
envconfig.MaxRunners = 1
|
||||||
s.newServerFn = scenario3a.newServer
|
s.newServerFn = scenario3a.newServer
|
||||||
slog.Info("scenario3a")
|
slog.Info("scenario3a")
|
||||||
s.pendingReqCh <- scenario3a.req
|
s.pendingReqCh <- scenario3a.req
|
||||||
@@ -268,7 +245,7 @@ func TestRequests(t *testing.T) {
|
|||||||
require.Len(t, s.loaded, 1)
|
require.Len(t, s.loaded, 1)
|
||||||
s.loadedMu.Unlock()
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
loadedMax = 0
|
envconfig.MaxRunners = 0
|
||||||
s.newServerFn = scenario3b.newServer
|
s.newServerFn = scenario3b.newServer
|
||||||
slog.Info("scenario3b")
|
slog.Info("scenario3b")
|
||||||
s.pendingReqCh <- scenario3b.req
|
s.pendingReqCh <- scenario3b.req
|
||||||
@@ -329,7 +306,7 @@ func TestRequests(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestGetRunner(t *testing.T) {
|
func TestGetRunner(t *testing.T) {
|
||||||
ctx, done := context.WithTimeout(context.Background(), 20*time.Millisecond)
|
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||||
defer done()
|
defer done()
|
||||||
|
|
||||||
// Same model, same request
|
// Same model, same request
|
||||||
@@ -339,7 +316,7 @@ func TestGetRunner(t *testing.T) {
|
|||||||
scenario1b.req.sessionDuration = 0
|
scenario1b.req.sessionDuration = 0
|
||||||
scenario1c := newScenario(t, ctx, "ollama-model-1c", 10)
|
scenario1c := newScenario(t, ctx, "ollama-model-1c", 10)
|
||||||
scenario1c.req.sessionDuration = 0
|
scenario1c.req.sessionDuration = 0
|
||||||
maxQueuedRequests = 1
|
envconfig.MaxQueuedRequests = 1
|
||||||
s := InitScheduler(ctx)
|
s := InitScheduler(ctx)
|
||||||
s.getGpuFn = func() gpu.GpuInfoList {
|
s.getGpuFn = func() gpu.GpuInfoList {
|
||||||
g := gpu.GpuInfo{Library: "metal"}
|
g := gpu.GpuInfo{Library: "metal"}
|
||||||
@@ -391,7 +368,7 @@ func TestGetRunner(t *testing.T) {
|
|||||||
|
|
||||||
// TODO - add one scenario that triggers the bogus finished event with positive ref count
|
// TODO - add one scenario that triggers the bogus finished event with positive ref count
|
||||||
func TestPrematureExpired(t *testing.T) {
|
func TestPrematureExpired(t *testing.T) {
|
||||||
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond)
|
||||||
defer done()
|
defer done()
|
||||||
|
|
||||||
// Same model, same request
|
// Same model, same request
|
||||||
@@ -436,7 +413,7 @@ func TestPrematureExpired(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestUseLoadedRunner(t *testing.T) {
|
func TestUseLoadedRunner(t *testing.T) {
|
||||||
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||||
req := &LlmRequest{
|
req := &LlmRequest{
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
opts: api.DefaultOptions(),
|
opts: api.DefaultOptions(),
|
||||||
@@ -461,7 +438,7 @@ func TestUseLoadedRunner(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestUpdateFreeSpace(t *testing.T) {
|
func TestUpdateFreeSpace(t *testing.T) {
|
||||||
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||||
defer done()
|
defer done()
|
||||||
gpus := gpu.GpuInfoList{
|
gpus := gpu.GpuInfoList{
|
||||||
{
|
{
|
||||||
@@ -494,12 +471,9 @@ func TestUpdateFreeSpace(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestFindRunnerToUnload(t *testing.T) {
|
func TestFindRunnerToUnload(t *testing.T) {
|
||||||
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||||
defer done()
|
defer done()
|
||||||
req := &LlmRequest{
|
|
||||||
ctx: ctx,
|
|
||||||
opts: api.DefaultOptions(),
|
|
||||||
}
|
|
||||||
r1 := &runnerRef{refCount: 1, sessionDuration: 1}
|
r1 := &runnerRef{refCount: 1, sessionDuration: 1}
|
||||||
r2 := &runnerRef{sessionDuration: 2}
|
r2 := &runnerRef{sessionDuration: 2}
|
||||||
|
|
||||||
@@ -509,16 +483,16 @@ func TestFindRunnerToUnload(t *testing.T) {
|
|||||||
s.loaded["b"] = r2
|
s.loaded["b"] = r2
|
||||||
s.loadedMu.Unlock()
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
resp := s.findRunnerToUnload(req)
|
resp := s.findRunnerToUnload()
|
||||||
require.Equal(t, r2, resp)
|
require.Equal(t, r2, resp)
|
||||||
r2.refCount = 1
|
r2.refCount = 1
|
||||||
resp = s.findRunnerToUnload(req)
|
resp = s.findRunnerToUnload()
|
||||||
require.Equal(t, r1, resp)
|
require.Equal(t, r1, resp)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNeedsReload(t *testing.T) {
|
func TestNeedsReload(t *testing.T) {
|
||||||
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||||
defer done()
|
defer done()
|
||||||
|
|
||||||
llm := &mockLlm{}
|
llm := &mockLlm{}
|
||||||
@@ -562,7 +536,7 @@ func TestNeedsReload(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestUnloadAllRunners(t *testing.T) {
|
func TestUnloadAllRunners(t *testing.T) {
|
||||||
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||||
defer done()
|
defer done()
|
||||||
|
|
||||||
llm1 := &mockLlm{}
|
llm1 := &mockLlm{}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
package parser
|
package model
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
@@ -10,11 +10,41 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type File struct {
|
||||||
|
Commands []Command
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f File) String() string {
|
||||||
|
var sb strings.Builder
|
||||||
|
for _, cmd := range f.Commands {
|
||||||
|
fmt.Fprintln(&sb, cmd.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
type Command struct {
|
type Command struct {
|
||||||
Name string
|
Name string
|
||||||
Args string
|
Args string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c Command) String() string {
|
||||||
|
var sb strings.Builder
|
||||||
|
switch c.Name {
|
||||||
|
case "model":
|
||||||
|
fmt.Fprintf(&sb, "FROM %s", c.Args)
|
||||||
|
case "license", "template", "system", "adapter":
|
||||||
|
fmt.Fprintf(&sb, "%s %s", strings.ToUpper(c.Name), quote(c.Args))
|
||||||
|
case "message":
|
||||||
|
role, message, _ := strings.Cut(c.Args, ": ")
|
||||||
|
fmt.Fprintf(&sb, "MESSAGE %s %s", role, quote(message))
|
||||||
|
default:
|
||||||
|
fmt.Fprintf(&sb, "PARAMETER %s %s", c.Name, quote(c.Args))
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
type state int
|
type state int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -32,38 +62,14 @@ var (
|
|||||||
errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
|
errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
|
||||||
)
|
)
|
||||||
|
|
||||||
func Format(cmds []Command) string {
|
func ParseFile(r io.Reader) (*File, error) {
|
||||||
var sb strings.Builder
|
|
||||||
for _, cmd := range cmds {
|
|
||||||
name := cmd.Name
|
|
||||||
args := cmd.Args
|
|
||||||
|
|
||||||
switch cmd.Name {
|
|
||||||
case "model":
|
|
||||||
name = "from"
|
|
||||||
args = cmd.Args
|
|
||||||
case "license", "template", "system", "adapter":
|
|
||||||
args = quote(args)
|
|
||||||
case "message":
|
|
||||||
role, message, _ := strings.Cut(cmd.Args, ": ")
|
|
||||||
args = role + " " + quote(message)
|
|
||||||
default:
|
|
||||||
name = "parameter"
|
|
||||||
args = cmd.Name + " " + quote(cmd.Args)
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Fprintln(&sb, strings.ToUpper(name), args)
|
|
||||||
}
|
|
||||||
|
|
||||||
return sb.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
func Parse(r io.Reader) (cmds []Command, err error) {
|
|
||||||
var cmd Command
|
var cmd Command
|
||||||
var curr state
|
var curr state
|
||||||
var b bytes.Buffer
|
var b bytes.Buffer
|
||||||
var role string
|
var role string
|
||||||
|
|
||||||
|
var f File
|
||||||
|
|
||||||
br := bufio.NewReader(r)
|
br := bufio.NewReader(r)
|
||||||
for {
|
for {
|
||||||
r, _, err := br.ReadRune()
|
r, _, err := br.ReadRune()
|
||||||
@@ -128,7 +134,7 @@ func Parse(r io.Reader) (cmds []Command, err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
cmd.Args = s
|
cmd.Args = s
|
||||||
cmds = append(cmds, cmd)
|
f.Commands = append(f.Commands, cmd)
|
||||||
}
|
}
|
||||||
|
|
||||||
b.Reset()
|
b.Reset()
|
||||||
@@ -157,14 +163,14 @@ func Parse(r io.Reader) (cmds []Command, err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
cmd.Args = s
|
cmd.Args = s
|
||||||
cmds = append(cmds, cmd)
|
f.Commands = append(f.Commands, cmd)
|
||||||
default:
|
default:
|
||||||
return nil, io.ErrUnexpectedEOF
|
return nil, io.ErrUnexpectedEOF
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, cmd := range cmds {
|
for _, cmd := range f.Commands {
|
||||||
if cmd.Name == "model" {
|
if cmd.Name == "model" {
|
||||||
return cmds, nil
|
return &f, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package parser
|
package model
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
@@ -10,7 +10,7 @@ import (
|
|||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestParser(t *testing.T) {
|
func TestParseFileFile(t *testing.T) {
|
||||||
input := `
|
input := `
|
||||||
FROM model1
|
FROM model1
|
||||||
ADAPTER adapter1
|
ADAPTER adapter1
|
||||||
@@ -22,8 +22,8 @@ TEMPLATE template1
|
|||||||
|
|
||||||
reader := strings.NewReader(input)
|
reader := strings.NewReader(input)
|
||||||
|
|
||||||
commands, err := Parse(reader)
|
modelfile, err := ParseFile(reader)
|
||||||
assert.Nil(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
expectedCommands := []Command{
|
expectedCommands := []Command{
|
||||||
{Name: "model", Args: "model1"},
|
{Name: "model", Args: "model1"},
|
||||||
@@ -34,10 +34,10 @@ TEMPLATE template1
|
|||||||
{Name: "template", Args: "template1"},
|
{Name: "template", Args: "template1"},
|
||||||
}
|
}
|
||||||
|
|
||||||
assert.Equal(t, expectedCommands, commands)
|
assert.Equal(t, expectedCommands, modelfile.Commands)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParserFrom(t *testing.T) {
|
func TestParseFileFrom(t *testing.T) {
|
||||||
var cases = []struct {
|
var cases = []struct {
|
||||||
input string
|
input string
|
||||||
expected []Command
|
expected []Command
|
||||||
@@ -85,14 +85,16 @@ func TestParserFrom(t *testing.T) {
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
t.Run("", func(t *testing.T) {
|
t.Run("", func(t *testing.T) {
|
||||||
commands, err := Parse(strings.NewReader(c.input))
|
modelfile, err := ParseFile(strings.NewReader(c.input))
|
||||||
assert.ErrorIs(t, err, c.err)
|
assert.ErrorIs(t, err, c.err)
|
||||||
assert.Equal(t, c.expected, commands)
|
if modelfile != nil {
|
||||||
|
assert.Equal(t, c.expected, modelfile.Commands)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParserParametersMissingValue(t *testing.T) {
|
func TestParseFileParametersMissingValue(t *testing.T) {
|
||||||
input := `
|
input := `
|
||||||
FROM foo
|
FROM foo
|
||||||
PARAMETER param1
|
PARAMETER param1
|
||||||
@@ -100,21 +102,21 @@ PARAMETER param1
|
|||||||
|
|
||||||
reader := strings.NewReader(input)
|
reader := strings.NewReader(input)
|
||||||
|
|
||||||
_, err := Parse(reader)
|
_, err := ParseFile(reader)
|
||||||
assert.ErrorIs(t, err, io.ErrUnexpectedEOF)
|
assert.ErrorIs(t, err, io.ErrUnexpectedEOF)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParserBadCommand(t *testing.T) {
|
func TestParseFileBadCommand(t *testing.T) {
|
||||||
input := `
|
input := `
|
||||||
FROM foo
|
FROM foo
|
||||||
BADCOMMAND param1 value1
|
BADCOMMAND param1 value1
|
||||||
`
|
`
|
||||||
_, err := Parse(strings.NewReader(input))
|
_, err := ParseFile(strings.NewReader(input))
|
||||||
assert.ErrorIs(t, err, errInvalidCommand)
|
assert.ErrorIs(t, err, errInvalidCommand)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParserMessages(t *testing.T) {
|
func TestParseFileMessages(t *testing.T) {
|
||||||
var cases = []struct {
|
var cases = []struct {
|
||||||
input string
|
input string
|
||||||
expected []Command
|
expected []Command
|
||||||
@@ -123,34 +125,34 @@ func TestParserMessages(t *testing.T) {
|
|||||||
{
|
{
|
||||||
`
|
`
|
||||||
FROM foo
|
FROM foo
|
||||||
MESSAGE system You are a Parser. Always Parse things.
|
MESSAGE system You are a file parser. Always parse things.
|
||||||
`,
|
`,
|
||||||
[]Command{
|
[]Command{
|
||||||
{Name: "model", Args: "foo"},
|
{Name: "model", Args: "foo"},
|
||||||
{Name: "message", Args: "system: You are a Parser. Always Parse things."},
|
{Name: "message", Args: "system: You are a file parser. Always parse things."},
|
||||||
},
|
},
|
||||||
nil,
|
nil,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
`
|
`
|
||||||
FROM foo
|
FROM foo
|
||||||
MESSAGE system You are a Parser. Always Parse things.`,
|
MESSAGE system You are a file parser. Always parse things.`,
|
||||||
[]Command{
|
[]Command{
|
||||||
{Name: "model", Args: "foo"},
|
{Name: "model", Args: "foo"},
|
||||||
{Name: "message", Args: "system: You are a Parser. Always Parse things."},
|
{Name: "message", Args: "system: You are a file parser. Always parse things."},
|
||||||
},
|
},
|
||||||
nil,
|
nil,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
`
|
`
|
||||||
FROM foo
|
FROM foo
|
||||||
MESSAGE system You are a Parser. Always Parse things.
|
MESSAGE system You are a file parser. Always parse things.
|
||||||
MESSAGE user Hey there!
|
MESSAGE user Hey there!
|
||||||
MESSAGE assistant Hello, I want to parse all the things!
|
MESSAGE assistant Hello, I want to parse all the things!
|
||||||
`,
|
`,
|
||||||
[]Command{
|
[]Command{
|
||||||
{Name: "model", Args: "foo"},
|
{Name: "model", Args: "foo"},
|
||||||
{Name: "message", Args: "system: You are a Parser. Always Parse things."},
|
{Name: "message", Args: "system: You are a file parser. Always parse things."},
|
||||||
{Name: "message", Args: "user: Hey there!"},
|
{Name: "message", Args: "user: Hey there!"},
|
||||||
{Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
|
{Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
|
||||||
},
|
},
|
||||||
@@ -160,12 +162,12 @@ MESSAGE assistant Hello, I want to parse all the things!
|
|||||||
`
|
`
|
||||||
FROM foo
|
FROM foo
|
||||||
MESSAGE system """
|
MESSAGE system """
|
||||||
You are a multiline Parser. Always Parse things.
|
You are a multiline file parser. Always parse things.
|
||||||
"""
|
"""
|
||||||
`,
|
`,
|
||||||
[]Command{
|
[]Command{
|
||||||
{Name: "model", Args: "foo"},
|
{Name: "model", Args: "foo"},
|
||||||
{Name: "message", Args: "system: \nYou are a multiline Parser. Always Parse things.\n"},
|
{Name: "message", Args: "system: \nYou are a multiline file parser. Always parse things.\n"},
|
||||||
},
|
},
|
||||||
nil,
|
nil,
|
||||||
},
|
},
|
||||||
@@ -196,14 +198,16 @@ MESSAGE system`,
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
t.Run("", func(t *testing.T) {
|
t.Run("", func(t *testing.T) {
|
||||||
commands, err := Parse(strings.NewReader(c.input))
|
modelfile, err := ParseFile(strings.NewReader(c.input))
|
||||||
assert.ErrorIs(t, err, c.err)
|
assert.ErrorIs(t, err, c.err)
|
||||||
assert.Equal(t, c.expected, commands)
|
if modelfile != nil {
|
||||||
|
assert.Equal(t, c.expected, modelfile.Commands)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParserQuoted(t *testing.T) {
|
func TestParseFileQuoted(t *testing.T) {
|
||||||
var cases = []struct {
|
var cases = []struct {
|
||||||
multiline string
|
multiline string
|
||||||
expected []Command
|
expected []Command
|
||||||
@@ -348,14 +352,16 @@ TEMPLATE """
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
t.Run("", func(t *testing.T) {
|
t.Run("", func(t *testing.T) {
|
||||||
commands, err := Parse(strings.NewReader(c.multiline))
|
modelfile, err := ParseFile(strings.NewReader(c.multiline))
|
||||||
assert.ErrorIs(t, err, c.err)
|
assert.ErrorIs(t, err, c.err)
|
||||||
assert.Equal(t, c.expected, commands)
|
if modelfile != nil {
|
||||||
|
assert.Equal(t, c.expected, modelfile.Commands)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParserParameters(t *testing.T) {
|
func TestParseFileParameters(t *testing.T) {
|
||||||
var cases = map[string]struct {
|
var cases = map[string]struct {
|
||||||
name, value string
|
name, value string
|
||||||
}{
|
}{
|
||||||
@@ -404,18 +410,18 @@ func TestParserParameters(t *testing.T) {
|
|||||||
var b bytes.Buffer
|
var b bytes.Buffer
|
||||||
fmt.Fprintln(&b, "FROM foo")
|
fmt.Fprintln(&b, "FROM foo")
|
||||||
fmt.Fprintln(&b, "PARAMETER", k)
|
fmt.Fprintln(&b, "PARAMETER", k)
|
||||||
commands, err := Parse(&b)
|
modelfile, err := ParseFile(&b)
|
||||||
assert.Nil(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
assert.Equal(t, []Command{
|
assert.Equal(t, []Command{
|
||||||
{Name: "model", Args: "foo"},
|
{Name: "model", Args: "foo"},
|
||||||
{Name: v.name, Args: v.value},
|
{Name: v.name, Args: v.value},
|
||||||
}, commands)
|
}, modelfile.Commands)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParserComments(t *testing.T) {
|
func TestParseFileComments(t *testing.T) {
|
||||||
var cases = []struct {
|
var cases = []struct {
|
||||||
input string
|
input string
|
||||||
expected []Command
|
expected []Command
|
||||||
@@ -433,14 +439,14 @@ FROM foo
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
t.Run("", func(t *testing.T) {
|
t.Run("", func(t *testing.T) {
|
||||||
commands, err := Parse(strings.NewReader(c.input))
|
modelfile, err := ParseFile(strings.NewReader(c.input))
|
||||||
assert.Nil(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, c.expected, commands)
|
assert.Equal(t, c.expected, modelfile.Commands)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseFormatParse(t *testing.T) {
|
func TestParseFileFormatParseFile(t *testing.T) {
|
||||||
var cases = []string{
|
var cases = []string{
|
||||||
`
|
`
|
||||||
FROM foo
|
FROM foo
|
||||||
@@ -449,7 +455,7 @@ LICENSE MIT
|
|||||||
PARAMETER param1 value1
|
PARAMETER param1 value1
|
||||||
PARAMETER param2 value2
|
PARAMETER param2 value2
|
||||||
TEMPLATE template1
|
TEMPLATE template1
|
||||||
MESSAGE system You are a Parser. Always Parse things.
|
MESSAGE system You are a file parser. Always parse things.
|
||||||
MESSAGE user Hey there!
|
MESSAGE user Hey there!
|
||||||
MESSAGE assistant Hello, I want to parse all the things!
|
MESSAGE assistant Hello, I want to parse all the things!
|
||||||
`,
|
`,
|
||||||
@@ -488,13 +494,13 @@ MESSAGE assistant Hello, I want to parse all the things!
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
t.Run("", func(t *testing.T) {
|
t.Run("", func(t *testing.T) {
|
||||||
commands, err := Parse(strings.NewReader(c))
|
modelfile, err := ParseFile(strings.NewReader(c))
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
commands2, err := Parse(strings.NewReader(Format(commands)))
|
modelfile2, err := ParseFile(strings.NewReader(modelfile.String()))
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
assert.Equal(t, commands, commands2)
|
assert.Equal(t, modelfile, modelfile2)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -161,7 +161,7 @@ func ParseNameBare(s string) Name {
|
|||||||
}
|
}
|
||||||
|
|
||||||
scheme, host, ok := strings.Cut(s, "://")
|
scheme, host, ok := strings.Cut(s, "://")
|
||||||
if ! ok {
|
if !ok {
|
||||||
host = scheme
|
host = scheme
|
||||||
}
|
}
|
||||||
n.Host = host
|
n.Host = host
|
||||||
|
|||||||
Reference in New Issue
Block a user