diff --git a/README.md b/README.md index a1a9c288..8d7659c5 100644 --- a/README.md +++ b/README.md @@ -370,12 +370,13 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram) - [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation) - [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama) -- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama) - [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot) - [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support) - [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot) - [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt) - [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama) +- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama) +- [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot) - [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama) - [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace) - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension) @@ -384,4 +385,5 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation) ### Supported backends -- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov. \ No newline at end of file +- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov. + diff --git a/app/lifecycle/logging.go b/app/lifecycle/logging.go index 98df9b41..4be90648 100644 --- a/app/lifecycle/logging.go +++ b/app/lifecycle/logging.go @@ -5,12 +5,14 @@ import ( "log/slog" "os" "path/filepath" + + "github.com/ollama/ollama/server/envconfig" ) func InitLogging() { level := slog.LevelInfo - if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" { + if envconfig.Debug { level = slog.LevelDebug } diff --git a/app/lifecycle/updater_windows.go b/app/lifecycle/updater_windows.go index f26c43c9..4053671a 100644 --- a/app/lifecycle/updater_windows.go +++ b/app/lifecycle/updater_windows.go @@ -31,16 +31,13 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error { "/LOG=" + filepath.Base(UpgradeLogFile), // Only relative seems reliable, so set pwd "/FORCECLOSEAPPLICATIONS", // Force close the tray app - might be needed } - // When we're not in debug mode, make the upgrade as quiet as possible (no GUI, no prompts) - // TODO - temporarily disable since we're pinning in debug mode for the preview - // if debug := os.Getenv("OLLAMA_DEBUG"); debug == "" { + // make the upgrade as quiet as possible (no GUI, no prompts) installArgs = append(installArgs, "/SP", // Skip the "This will install... Do you wish to continue" prompt "/SUPPRESSMSGBOXES", "/SILENT", "/VERYSILENT", ) - // } // Safeguard in case we have requests in flight that need to drain... slog.Info("Waiting for server to shutdown") diff --git a/app/tray/wintray/menus.go b/app/tray/wintray/menus.go index 74defa67..9cb3b893 100644 --- a/app/tray/wintray/menus.go +++ b/app/tray/wintray/menus.go @@ -1,71 +1,71 @@ -//go:build windows - -package wintray - -import ( - "fmt" - "log/slog" - "unsafe" - - "golang.org/x/sys/windows" -) - -const ( - updatAvailableMenuID = 1 - updateMenuID = updatAvailableMenuID + 1 - separatorMenuID = updateMenuID + 1 - diagLogsMenuID = separatorMenuID + 1 - diagSeparatorMenuID = diagLogsMenuID + 1 - quitMenuID = diagSeparatorMenuID + 1 -) - -func (t *winTray) initMenus() error { - if err := t.addOrUpdateMenuItem(diagLogsMenuID, 0, diagLogsMenuTitle, false); err != nil { - return fmt.Errorf("unable to create menu entries %w\n", err) - } - if err := t.addSeparatorMenuItem(diagSeparatorMenuID, 0); err != nil { - return fmt.Errorf("unable to create menu entries %w", err) - } - if err := t.addOrUpdateMenuItem(quitMenuID, 0, quitMenuTitle, false); err != nil { - return fmt.Errorf("unable to create menu entries %w\n", err) - } - return nil -} - -func (t *winTray) UpdateAvailable(ver string) error { - if !t.updateNotified { - slog.Debug("updating menu and sending notification for new update") - if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil { - return fmt.Errorf("unable to create menu entries %w", err) - } - if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil { - return fmt.Errorf("unable to create menu entries %w", err) - } - if err := t.addSeparatorMenuItem(separatorMenuID, 0); err != nil { - return fmt.Errorf("unable to create menu entries %w", err) - } - iconFilePath, err := iconBytesToFilePath(wt.updateIcon) - if err != nil { - return fmt.Errorf("unable to write icon data to temp file: %w", err) - } - if err := wt.setIcon(iconFilePath); err != nil { - return fmt.Errorf("unable to set icon: %w", err) - } - t.updateNotified = true - - t.pendingUpdate = true - // Now pop up the notification - t.muNID.Lock() - defer t.muNID.Unlock() - copy(t.nid.InfoTitle[:], windows.StringToUTF16(updateTitle)) - copy(t.nid.Info[:], windows.StringToUTF16(fmt.Sprintf(updateMessage, ver))) - t.nid.Flags |= NIF_INFO - t.nid.Timeout = 10 - t.nid.Size = uint32(unsafe.Sizeof(*wt.nid)) - err = t.nid.modify() - if err != nil { - return err - } - } - return nil -} +//go:build windows + +package wintray + +import ( + "fmt" + "log/slog" + "unsafe" + + "golang.org/x/sys/windows" +) + +const ( + updatAvailableMenuID = 1 + updateMenuID = updatAvailableMenuID + 1 + separatorMenuID = updateMenuID + 1 + diagLogsMenuID = separatorMenuID + 1 + diagSeparatorMenuID = diagLogsMenuID + 1 + quitMenuID = diagSeparatorMenuID + 1 +) + +func (t *winTray) initMenus() error { + if err := t.addOrUpdateMenuItem(diagLogsMenuID, 0, diagLogsMenuTitle, false); err != nil { + return fmt.Errorf("unable to create menu entries %w\n", err) + } + if err := t.addSeparatorMenuItem(diagSeparatorMenuID, 0); err != nil { + return fmt.Errorf("unable to create menu entries %w", err) + } + if err := t.addOrUpdateMenuItem(quitMenuID, 0, quitMenuTitle, false); err != nil { + return fmt.Errorf("unable to create menu entries %w\n", err) + } + return nil +} + +func (t *winTray) UpdateAvailable(ver string) error { + if !t.updateNotified { + slog.Debug("updating menu and sending notification for new update") + if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil { + return fmt.Errorf("unable to create menu entries %w", err) + } + if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil { + return fmt.Errorf("unable to create menu entries %w", err) + } + if err := t.addSeparatorMenuItem(separatorMenuID, 0); err != nil { + return fmt.Errorf("unable to create menu entries %w", err) + } + iconFilePath, err := iconBytesToFilePath(wt.updateIcon) + if err != nil { + return fmt.Errorf("unable to write icon data to temp file: %w", err) + } + if err := wt.setIcon(iconFilePath); err != nil { + return fmt.Errorf("unable to set icon: %w", err) + } + t.updateNotified = true + + t.pendingUpdate = true + // Now pop up the notification + t.muNID.Lock() + defer t.muNID.Unlock() + copy(t.nid.InfoTitle[:], windows.StringToUTF16(updateTitle)) + copy(t.nid.Info[:], windows.StringToUTF16(fmt.Sprintf(updateMessage, ver))) + t.nid.Flags |= NIF_INFO + t.nid.Timeout = 10 + t.nid.Size = uint32(unsafe.Sizeof(*wt.nid)) + err = t.nid.modify() + if err != nil { + return err + } + } + return nil +} diff --git a/cmd/cmd.go b/cmd/cmd.go index afae9d90..faac424c 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -34,7 +34,6 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/auth" "github.com/ollama/ollama/format" - "github.com/ollama/ollama/parser" "github.com/ollama/ollama/progress" "github.com/ollama/ollama/server" "github.com/ollama/ollama/types/errtypes" @@ -57,13 +56,13 @@ func CreateHandler(cmd *cobra.Command, args []string) error { p := progress.NewProgress(os.Stderr) defer p.Stop() - modelfile, err := os.Open(filename) + f, err := os.Open(filename) if err != nil { return err } - defer modelfile.Close() + defer f.Close() - commands, err := parser.Parse(modelfile) + modelfile, err := model.ParseFile(f) if err != nil { return err } @@ -77,10 +76,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error { spinner := progress.NewSpinner(status) p.Add(status, spinner) - for i := range commands { - switch commands[i].Name { + for i := range modelfile.Commands { + switch modelfile.Commands[i].Name { case "model", "adapter": - path := commands[i].Args + path := modelfile.Commands[i].Args if path == "~" { path = home } else if strings.HasPrefix(path, "~/") { @@ -92,7 +91,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error { } fi, err := os.Stat(path) - if errors.Is(err, os.ErrNotExist) && commands[i].Name == "model" { + if errors.Is(err, os.ErrNotExist) && modelfile.Commands[i].Name == "model" { continue } else if err != nil { return err @@ -115,7 +114,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error { return err } - commands[i].Args = "@"+digest + modelfile.Commands[i].Args = "@" + digest } } @@ -145,7 +144,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error { quantization, _ := cmd.Flags().GetString("quantization") - request := api.CreateRequest{Name: args[0], Modelfile: parser.Format(commands), Quantization: quantization} + request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantization: quantization} if err := client.Create(cmd.Context(), &request, fn); err != nil { return err } diff --git a/convert/safetensors.go b/convert/safetensors.go index fb8aa019..69424c4d 100644 --- a/convert/safetensors.go +++ b/convert/safetensors.go @@ -53,7 +53,7 @@ func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Ten var err error t, offset, err = m.readTensors(f, offset, params) if err != nil { - slog.Error("%v", err) + slog.Error(err.Error()) return nil, err } tensors = append(tensors, t...) @@ -122,7 +122,7 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ggufName, err := m.GetLayerName(k) if err != nil { - slog.Error("%v", err) + slog.Error(err.Error()) return nil, 0, err } diff --git a/convert/torch.go b/convert/torch.go index fd237505..92c58872 100644 --- a/convert/torch.go +++ b/convert/torch.go @@ -74,7 +74,7 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, ggufName, err := tf.GetLayerName(k.(string)) if err != nil { - slog.Error("%v", err) + slog.Error(err.Error()) return nil, err } slog.Debug(fmt.Sprintf("finding name for '%s' -> '%s'", k.(string), ggufName)) diff --git a/docs/api.md b/docs/api.md index 5fc946ce..e79b6f5a 100644 --- a/docs/api.md +++ b/docs/api.md @@ -17,7 +17,7 @@ ### Model names -Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama2:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version. +Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version. ### Durations @@ -66,7 +66,7 @@ Enable JSON mode by setting the `format` parameter to `json`. This will structur ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama2", + "model": "llama3", "prompt": "Why is the sky blue?" }' ``` @@ -77,7 +77,7 @@ A stream of JSON objects is returned: ```json { - "model": "llama2", + "model": "llama3", "created_at": "2023-08-04T08:52:19.385406455-07:00", "response": "The", "done": false @@ -99,7 +99,7 @@ To calculate how fast the response is generated in tokens per second (token/s), ```json { - "model": "llama2", + "model": "llama3", "created_at": "2023-08-04T19:22:45.499127Z", "response": "", "done": true, @@ -121,7 +121,7 @@ A response can be received in one reply when streaming is off. ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama2", + "model": "llama3", "prompt": "Why is the sky blue?", "stream": false }' @@ -133,7 +133,7 @@ If `stream` is set to `false`, the response will be a single JSON object: ```json { - "model": "llama2", + "model": "llama3", "created_at": "2023-08-04T19:22:45.499127Z", "response": "The sky is blue because it is the color of the sky.", "done": true, @@ -155,7 +155,7 @@ If `stream` is set to `false`, the response will be a single JSON object: ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama2", + "model": "llama3", "prompt": "What color is the sky at different times of the day? Respond using JSON", "format": "json", "stream": false @@ -166,7 +166,7 @@ curl http://localhost:11434/api/generate -d '{ ```json { - "model": "llama2", + "model": "llama3", "created_at": "2023-11-09T21:07:55.186497Z", "response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n", "done": true, @@ -289,7 +289,7 @@ If you want to set custom options for the model at runtime rather than in the Mo ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama2", + "model": "llama3", "prompt": "Why is the sky blue?", "stream": false, "options": { @@ -332,7 +332,7 @@ curl http://localhost:11434/api/generate -d '{ ```json { - "model": "llama2", + "model": "llama3", "created_at": "2023-08-04T19:22:45.499127Z", "response": "The sky is blue because it is the color of the sky.", "done": true, @@ -354,7 +354,7 @@ If an empty prompt is provided, the model will be loaded into memory. ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama2" + "model": "llama3" }' ``` @@ -364,7 +364,7 @@ A single JSON object is returned: ```json { - "model": "llama2", + "model": "llama3", "created_at": "2023-12-18T19:52:07.071755Z", "response": "", "done": true @@ -407,7 +407,7 @@ Send a chat message with a streaming response. ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama2", + "model": "llama3", "messages": [ { "role": "user", @@ -423,7 +423,7 @@ A stream of JSON objects is returned: ```json { - "model": "llama2", + "model": "llama3", "created_at": "2023-08-04T08:52:19.385406455-07:00", "message": { "role": "assistant", @@ -438,7 +438,7 @@ Final response: ```json { - "model": "llama2", + "model": "llama3", "created_at": "2023-08-04T19:22:45.499127Z", "done": true, "total_duration": 4883583458, @@ -456,7 +456,7 @@ Final response: ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama2", + "model": "llama3", "messages": [ { "role": "user", @@ -471,7 +471,7 @@ curl http://localhost:11434/api/chat -d '{ ```json { - "model": "registry.ollama.ai/library/llama2:latest", + "model": "registry.ollama.ai/library/llama3:latest", "created_at": "2023-12-12T14:13:43.416799Z", "message": { "role": "assistant", @@ -495,7 +495,7 @@ Send a chat message with a conversation history. You can use this same approach ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama2", + "model": "llama3", "messages": [ { "role": "user", @@ -519,7 +519,7 @@ A stream of JSON objects is returned: ```json { - "model": "llama2", + "model": "llama3", "created_at": "2023-08-04T08:52:19.385406455-07:00", "message": { "role": "assistant", @@ -533,7 +533,7 @@ Final response: ```json { - "model": "llama2", + "model": "llama3", "created_at": "2023-08-04T19:22:45.499127Z", "done": true, "total_duration": 8113331500, @@ -591,7 +591,7 @@ curl http://localhost:11434/api/chat -d '{ ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama2", + "model": "llama3", "messages": [ { "role": "user", @@ -609,7 +609,7 @@ curl http://localhost:11434/api/chat -d '{ ```json { - "model": "registry.ollama.ai/library/llama2:latest", + "model": "registry.ollama.ai/library/llama3:latest", "created_at": "2023-12-12T14:13:43.416799Z", "message": { "role": "assistant", @@ -651,7 +651,7 @@ Create a new model from a `Modelfile`. ```shell curl http://localhost:11434/api/create -d '{ "name": "mario", - "modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros." + "modelfile": "FROM llama3\nSYSTEM You are mario from Super Mario Bros." }' ``` @@ -758,7 +758,7 @@ A single JSON object will be returned. } }, { - "name": "llama2:latest", + "name": "llama3:latest", "modified_at": "2023-12-07T09:32:18.757212583-08:00", "size": 3825819519, "digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e", @@ -792,7 +792,7 @@ Show information about a model including details, modelfile, template, parameter ```shell curl http://localhost:11434/api/show -d '{ - "name": "llama2" + "name": "llama3" }' ``` @@ -827,8 +827,8 @@ Copy a model. Creates a model with another name from an existing model. ```shell curl http://localhost:11434/api/copy -d '{ - "source": "llama2", - "destination": "llama2-backup" + "source": "llama3", + "destination": "llama3-backup" }' ``` @@ -854,7 +854,7 @@ Delete a model and its data. ```shell curl -X DELETE http://localhost:11434/api/delete -d '{ - "name": "llama2:13b" + "name": "llama3:13b" }' ``` @@ -882,7 +882,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where ```shell curl http://localhost:11434/api/pull -d '{ - "name": "llama2" + "name": "llama3" }' ``` diff --git a/docs/faq.md b/docs/faq.md index 7ade43b7..109a1144 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -32,7 +32,7 @@ When using the API, specify the `num_ctx` parameter: ``` curl http://localhost:11434/api/generate -d '{ - "model": "llama2", + "model": "llama3", "prompt": "Why is the sky blue?", "options": { "num_ctx": 4096 @@ -88,9 +88,9 @@ On windows, Ollama inherits your user and system environment variables. 3. Edit or create New variable(s) for your user account for `OLLAMA_HOST`, `OLLAMA_MODELS`, etc. -4. Click OK/Apply to save +4. Click OK/Apply to save -5. Run `ollama` from a new terminal window +5. Run `ollama` from a new terminal window ## How can I expose Ollama on my network? @@ -221,14 +221,20 @@ The `keep_alive` parameter can be set to: For example, to preload a model and leave it in memory use: ```shell -curl http://localhost:11434/api/generate -d '{"model": "llama2", "keep_alive": -1}' +curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": -1}' ``` To unload the model and free up memory use: ```shell -curl http://localhost:11434/api/generate -d '{"model": "llama2", "keep_alive": 0}' +curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": 0}' ``` Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable. If you wish to override the `OLLAMA_KEEP_ALIVE` setting, use the `keep_alive` API parameter with the `/api/generate` or `/api/chat` API endpoints. + +## How do I manage the maximum number of requests the server can queue + +If too many requests are sent to the server, it will respond with a 503 error +indicating the server is overloaded. You can adjust how many requests may be +queue by setting `OLLAMA_MAX_QUEUE` \ No newline at end of file diff --git a/docs/modelfile.md b/docs/modelfile.md index 24002bde..21ee1826 100644 --- a/docs/modelfile.md +++ b/docs/modelfile.md @@ -10,7 +10,7 @@ A model file is the blueprint to create and share models with Ollama. - [Examples](#examples) - [Instructions](#instructions) - [FROM (Required)](#from-required) - - [Build from llama2](#build-from-llama2) + - [Build from llama3](#build-from-llama3) - [Build from a bin file](#build-from-a-bin-file) - [PARAMETER](#parameter) - [Valid Parameters and Values](#valid-parameters-and-values) @@ -48,7 +48,7 @@ INSTRUCTION arguments An example of a `Modelfile` creating a mario blueprint: ```modelfile -FROM llama2 +FROM llama3 # sets the temperature to 1 [higher is more creative, lower is more coherent] PARAMETER temperature 1 # sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token @@ -67,33 +67,25 @@ To use this: More examples are available in the [examples directory](../examples). -### `Modelfile`s in [ollama.com/library][1] - -There are two ways to view `Modelfile`s underlying the models in [ollama.com/library][1]: - -- Option 1: view a details page from a model's tags page: - 1. Go to a particular model's tags (e.g. https://ollama.com/library/llama2/tags) - 2. Click on a tag (e.g. https://ollama.com/library/llama2:13b) - 3. Scroll down to "Layers" - - Note: if the [`FROM` instruction](#from-required) is not present, - it means the model was created from a local file -- Option 2: use `ollama show` to print the `Modelfile` for any local models like so: +To view the Modelfile of a given model, use the `ollama show --modelfile` command. ```bash - > ollama show --modelfile llama2:13b + > ollama show --modelfile llama3 # Modelfile generated by "ollama show" # To build a new Modelfile based on this one, replace the FROM line with: - # FROM llama2:13b + # FROM llama3:latest + FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29 + TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|> - FROM /root/.ollama/models/blobs/sha256:123abc - TEMPLATE """[INST] {{ if .System }}<>{{ .System }}<> + {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|> - {{ end }}{{ .Prompt }} [/INST] """ - SYSTEM """""" - PARAMETER stop [INST] - PARAMETER stop [/INST] - PARAMETER stop <> - PARAMETER stop <> + {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|> + + {{ .Response }}<|eot_id|>""" + PARAMETER stop "<|start_header_id|>" + PARAMETER stop "<|end_header_id|>" + PARAMETER stop "<|eot_id|>" + PARAMETER stop "<|reserved_special_token" ``` ## Instructions @@ -106,10 +98,10 @@ The `FROM` instruction defines the base model to use when creating a model. FROM : ``` -#### Build from llama2 +#### Build from llama3 ```modelfile -FROM llama2 +FROM llama3 ``` A list of available base models: diff --git a/docs/openai.md b/docs/openai.md index b4dc1f21..557b5846 100644 --- a/docs/openai.md +++ b/docs/openai.md @@ -25,7 +25,7 @@ chat_completion = client.chat.completions.create( 'content': 'Say this is a test', } ], - model='llama2', + model='llama3', ) ``` @@ -43,7 +43,7 @@ const openai = new OpenAI({ const chatCompletion = await openai.chat.completions.create({ messages: [{ role: 'user', content: 'Say this is a test' }], - model: 'llama2', + model: 'llama3', }) ``` @@ -53,7 +53,7 @@ const chatCompletion = await openai.chat.completions.create({ curl http://localhost:11434/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "llama2", + "model": "llama3", "messages": [ { "role": "system", @@ -113,7 +113,7 @@ curl http://localhost:11434/v1/chat/completions \ Before using a model, pull it locally `ollama pull`: ```shell -ollama pull llama2 +ollama pull llama3 ``` ### Default model names @@ -121,7 +121,7 @@ ollama pull llama2 For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name: ``` -ollama cp llama2 gpt-3.5-turbo +ollama cp llama3 gpt-3.5-turbo ``` Afterwards, this new model name can be specified the `model` field: diff --git a/docs/tutorials/langchainjs.md b/docs/tutorials/langchainjs.md index 7cd4012f..63b34aa6 100644 --- a/docs/tutorials/langchainjs.md +++ b/docs/tutorials/langchainjs.md @@ -15,7 +15,7 @@ import { Ollama } from "langchain/llms/ollama"; const ollama = new Ollama({ baseUrl: "http://localhost:11434", - model: "llama2", + model: "llama3", }); const answer = await ollama.invoke(`why is the sky blue?`); @@ -23,10 +23,10 @@ const answer = await ollama.invoke(`why is the sky blue?`); console.log(answer); ``` -That will get us the same thing as if we ran `ollama run llama2 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app. +That will get us the same thing as if we ran `ollama run llama3 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app. ```bash -npm install cheerio +npm install cheerio ``` ```javascript diff --git a/docs/windows.md b/docs/windows.md index 906dbfcc..72c5d32c 100644 --- a/docs/windows.md +++ b/docs/windows.md @@ -1,3 +1,4 @@ +<<<<<<< HEAD # Ollama Windows Preview Welcome to the Ollama Windows preview. @@ -27,7 +28,7 @@ Logs will often be helpful in diagnosing the problem (see Here's a quick example showing API access from `powershell` ```powershell -(Invoke-WebRequest -method POST -Body '{"model":"llama2", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json +(Invoke-WebRequest -method POST -Body '{"model":"llama3", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json ``` ## Troubleshooting @@ -45,3 +46,17 @@ the explorer window by hitting `+R` and type in: - `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH) - `explorer %HOMEPATH%\.ollama` contains models and configuration - `explorer %TEMP%` contains temporary executable files in one or more `ollama*` directories + + +## Standalone CLI + +The easiest way to install Ollama on Windows is to use the `OllamaSetup.exe` +installer. It installs in your account without requiring Administrator rights. +We update Ollama regularly to support the latest models, and this installer will +help you keep up to date. + +If you'd like to install or integrate Ollama as a service, a standalone +`ollama-windows-amd64.zip` zip file is available containing only the Ollama CLI +and GPU library dependencies for Nvidia and AMD. This allows for embedding +Ollama in existing applications, or running it as a system service via `ollama +serve` with tools such as [NSSM](https://nssm.cc/). \ No newline at end of file diff --git a/examples/bash-comparemodels/README.md b/examples/bash-comparemodels/README.md index 91499255..65e66f1e 100644 --- a/examples/bash-comparemodels/README.md +++ b/examples/bash-comparemodels/README.md @@ -2,7 +2,7 @@ When calling `ollama`, you can pass it a file to run all the prompts in the file, one after the other: -`ollama run llama2 < sourcequestions.txt` +`ollama run llama3 < sourcequestions.txt` This concept is used in the following example. diff --git a/examples/go-chat/main.go b/examples/go-chat/main.go index 83aaad3d..5266f03e 100644 --- a/examples/go-chat/main.go +++ b/examples/go-chat/main.go @@ -35,7 +35,7 @@ func main() { ctx := context.Background() req := &api.ChatRequest{ - Model: "llama2", + Model: "llama3", Messages: messages, } diff --git a/examples/go-http-generate/main.go b/examples/go-http-generate/main.go index f4ca32f4..e5b64348 100644 --- a/examples/go-http-generate/main.go +++ b/examples/go-http-generate/main.go @@ -19,7 +19,7 @@ func main() { } defer resp.Body.Close() - + responseData, err := io.ReadAll(resp.Body) if err != nil { log.Fatal(err) diff --git a/examples/langchain-python-rag-document/main.py b/examples/langchain-python-rag-document/main.py index b9f98c4e..3ed9499f 100644 --- a/examples/langchain-python-rag-document/main.py +++ b/examples/langchain-python-rag-document/main.py @@ -40,9 +40,9 @@ while True: continue # Prompt - template = """Use the following pieces of context to answer the question at the end. - If you don't know the answer, just say that you don't know, don't try to make up an answer. - Use three sentences maximum and keep the answer as concise as possible. + template = """Use the following pieces of context to answer the question at the end. + If you don't know the answer, just say that you don't know, don't try to make up an answer. + Use three sentences maximum and keep the answer as concise as possible. {context} Question: {question} Helpful Answer:""" @@ -51,11 +51,11 @@ while True: template=template, ) - llm = Ollama(model="llama2:13b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])) + llm = Ollama(model="llama3:8b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])) qa_chain = RetrievalQA.from_chain_type( llm, retriever=vectorstore.as_retriever(), chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}, ) - result = qa_chain({"query": query}) \ No newline at end of file + result = qa_chain({"query": query}) diff --git a/examples/langchain-python-rag-websummary/main.py b/examples/langchain-python-rag-websummary/main.py index cd2ef47f..d1b05ba8 100644 --- a/examples/langchain-python-rag-websummary/main.py +++ b/examples/langchain-python-rag-websummary/main.py @@ -1,12 +1,12 @@ -from langchain.llms import Ollama -from langchain.document_loaders import WebBaseLoader +from langchain_community.llms import Ollama +from langchain_community.document_loaders import WebBaseLoader from langchain.chains.summarize import load_summarize_chain loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally") docs = loader.load() -llm = Ollama(model="llama2") +llm = Ollama(model="llama3") chain = load_summarize_chain(llm, chain_type="stuff") -result = chain.run(docs) +result = chain.invoke(docs) print(result) diff --git a/examples/langchain-python-simple/README.md b/examples/langchain-python-simple/README.md index 3f401ca8..d4102dec 100644 --- a/examples/langchain-python-simple/README.md +++ b/examples/langchain-python-simple/README.md @@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama. ## Running the Example -1. Ensure you have the `llama2` model installed: +1. Ensure you have the `llama3` model installed: ```bash - ollama pull llama2 + ollama pull llama3 ``` 2. Install the Python Requirements. @@ -21,4 +21,3 @@ This example is a basic "hello world" of using LangChain with Ollama. ```bash python main.py ``` - \ No newline at end of file diff --git a/examples/langchain-python-simple/main.py b/examples/langchain-python-simple/main.py index da696e00..7cb65286 100644 --- a/examples/langchain-python-simple/main.py +++ b/examples/langchain-python-simple/main.py @@ -1,6 +1,6 @@ from langchain.llms import Ollama input = input("What is your question?") -llm = Ollama(model="llama2") +llm = Ollama(model="llama3") res = llm.predict(input) print (res) diff --git a/examples/modelfile-mario/Modelfile b/examples/modelfile-mario/Modelfile index 35c787fc..33d5952b 100644 --- a/examples/modelfile-mario/Modelfile +++ b/examples/modelfile-mario/Modelfile @@ -1,4 +1,4 @@ -FROM llama2 +FROM llama3 PARAMETER temperature 1 SYSTEM """ You are Mario from super mario bros, acting as an assistant. diff --git a/examples/modelfile-mario/readme.md b/examples/modelfile-mario/readme.md index 0d72dddc..e4f0d417 100644 --- a/examples/modelfile-mario/readme.md +++ b/examples/modelfile-mario/readme.md @@ -2,12 +2,12 @@ # Example character: Mario -This example shows how to create a basic character using Llama2 as the base model. +This example shows how to create a basic character using Llama3 as the base model. To run this example: 1. Download the Modelfile -2. `ollama pull llama2` to get the base model used in the model file. +2. `ollama pull llama3` to get the base model used in the model file. 3. `ollama create NAME -f ./Modelfile` 4. `ollama run NAME` @@ -18,7 +18,7 @@ Ask it some questions like "Who are you?" or "Is Peach in trouble again?" What the model file looks like: ``` -FROM llama2 +FROM llama3 PARAMETER temperature 1 SYSTEM """ You are Mario from Super Mario Bros, acting as an assistant. diff --git a/examples/python-json-datagenerator/predefinedschema.py b/examples/python-json-datagenerator/predefinedschema.py index abc399c4..1fd54892 100644 --- a/examples/python-json-datagenerator/predefinedschema.py +++ b/examples/python-json-datagenerator/predefinedschema.py @@ -2,16 +2,16 @@ import requests import json import random -model = "llama2" +model = "llama3" template = { - "firstName": "", - "lastName": "", + "firstName": "", + "lastName": "", "address": { - "street": "", - "city": "", - "state": "", + "street": "", + "city": "", + "state": "", "zipCode": "" - }, + }, "phoneNumber": "" } diff --git a/examples/python-json-datagenerator/randomaddresses.py b/examples/python-json-datagenerator/randomaddresses.py index 5f27448f..72b1fefb 100644 --- a/examples/python-json-datagenerator/randomaddresses.py +++ b/examples/python-json-datagenerator/randomaddresses.py @@ -12,7 +12,7 @@ countries = [ "France", ] country = random.choice(countries) -model = "llama2" +model = "llama3" prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters." diff --git a/examples/python-json-datagenerator/readme.md b/examples/python-json-datagenerator/readme.md index 369fb2a5..88357044 100644 --- a/examples/python-json-datagenerator/readme.md +++ b/examples/python-json-datagenerator/readme.md @@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran ## Running the Example -1. Ensure you have the `llama2` model installed: +1. Ensure you have the `llama3` model installed: ```bash - ollama pull llama2 + ollama pull llama3 ``` 2. Install the Python Requirements. diff --git a/examples/python-simplechat/client.py b/examples/python-simplechat/client.py index 768a2289..9ae99fb7 100644 --- a/examples/python-simplechat/client.py +++ b/examples/python-simplechat/client.py @@ -2,7 +2,7 @@ import json import requests # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve` -model = "llama2" # TODO: update this for whatever model you wish to use +model = "llama3" # TODO: update this for whatever model you wish to use def chat(messages): diff --git a/examples/python-simplechat/readme.md b/examples/python-simplechat/readme.md index 204a8159..dd2576bc 100644 --- a/examples/python-simplechat/readme.md +++ b/examples/python-simplechat/readme.md @@ -4,10 +4,10 @@ The **chat** endpoint is one of two ways to generate text from an LLM with Ollam ## Running the Example -1. Ensure you have the `llama2` model installed: +1. Ensure you have the `llama3` model installed: ```bash - ollama pull llama2 + ollama pull llama3 ``` 2. Install the Python Requirements. diff --git a/examples/typescript-mentors/README.md b/examples/typescript-mentors/README.md index c3ce9c82..d3611a5e 100644 --- a/examples/typescript-mentors/README.md +++ b/examples/typescript-mentors/README.md @@ -4,10 +4,10 @@ This example demonstrates how one would create a set of 'mentors' you can have a ## Usage -1. Add llama2 to have the mentors ask your questions: +1. Add llama3 to have the mentors ask your questions: ```bash - ollama pull llama2 + ollama pull llama3 ``` 2. Install prerequisites: diff --git a/examples/typescript-mentors/character-generator.ts b/examples/typescript-mentors/character-generator.ts index 886eec67..dc5d2f5e 100644 --- a/examples/typescript-mentors/character-generator.ts +++ b/examples/typescript-mentors/character-generator.ts @@ -15,7 +15,7 @@ async function characterGenerator() { ollama.setModel("stablebeluga2:70b-q4_K_M"); const bio = await ollama.generate(`create a bio of ${character} in a single long paragraph. Instead of saying '${character} is...' or '${character} was...' use language like 'You are...' or 'You were...'. Then create a paragraph describing the speaking mannerisms and style of ${character}. Don't include anything about how ${character} looked or what they sounded like, just focus on the words they said. Instead of saying '${character} would say...' use language like 'You should say...'. If you use quotes, always use single quotes instead of double quotes. If there are any specific words or phrases you used a lot, show how you used them. `); - const thecontents = `FROM llama2\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`; + const thecontents = `FROM llama3\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`; fs.writeFile(path.join(directory, 'Modelfile'), thecontents, (err: any) => { if (err) throw err; @@ -23,4 +23,4 @@ async function characterGenerator() { }); } -characterGenerator(); \ No newline at end of file +characterGenerator(); diff --git a/examples/typescript-simplechat/client.ts b/examples/typescript-simplechat/client.ts index 3e571ab6..a1e0eea3 100644 --- a/examples/typescript-simplechat/client.ts +++ b/examples/typescript-simplechat/client.ts @@ -1,6 +1,6 @@ import * as readline from "readline"; -const model = "llama2"; +const model = "llama3"; type Message = { role: "assistant" | "user" | "system"; content: string; @@ -74,4 +74,4 @@ async function main() { } -main(); \ No newline at end of file +main(); diff --git a/gpu/assets.go b/gpu/assets.go index f9b018cd..911a6977 100644 --- a/gpu/assets.go +++ b/gpu/assets.go @@ -12,6 +12,8 @@ import ( "sync" "syscall" "time" + + "github.com/ollama/ollama/server/envconfig" ) var ( @@ -24,45 +26,8 @@ func PayloadsDir() (string, error) { defer lock.Unlock() var err error if payloadsDir == "" { - runnersDir := os.Getenv("OLLAMA_RUNNERS_DIR") - // On Windows we do not carry the payloads inside the main executable - if runtime.GOOS == "windows" && runnersDir == "" { - appExe, err := os.Executable() - if err != nil { - slog.Error("failed to lookup executable path", "error", err) - return "", err - } + runnersDir := envconfig.RunnersDir - cwd, err := os.Getwd() - if err != nil { - slog.Error("failed to lookup working directory", "error", err) - return "", err - } - - var paths []string - for _, root := range []string{filepath.Dir(appExe), cwd} { - paths = append(paths, - filepath.Join(root), - filepath.Join(root, "windows-"+runtime.GOARCH), - filepath.Join(root, "dist", "windows-"+runtime.GOARCH), - ) - } - - // Try a few variations to improve developer experience when building from source in the local tree - for _, p := range paths { - candidate := filepath.Join(p, "ollama_runners") - _, err := os.Stat(candidate) - if err == nil { - runnersDir = candidate - break - } - } - if runnersDir == "" { - err = fmt.Errorf("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'") - slog.Error("incomplete distribution", "error", err) - return "", err - } - } if runnersDir != "" { payloadsDir = runnersDir return payloadsDir, nil @@ -70,7 +35,7 @@ func PayloadsDir() (string, error) { // The remainder only applies on non-windows where we still carry payloads in the main executable cleanupTmpDirs() - tmpDir := os.Getenv("OLLAMA_TMPDIR") + tmpDir := envconfig.TmpDir if tmpDir == "" { tmpDir, err = os.MkdirTemp("", "ollama") if err != nil { @@ -133,7 +98,7 @@ func cleanupTmpDirs() { func Cleanup() { lock.Lock() defer lock.Unlock() - runnersDir := os.Getenv("OLLAMA_RUNNERS_DIR") + runnersDir := envconfig.RunnersDir if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" { // We want to fully clean up the tmpdir parent of the payloads dir tmpDir := filepath.Clean(filepath.Join(payloadsDir, "..")) diff --git a/gpu/gpu.go b/gpu/gpu.go index 9b915015..a056a90b 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -21,6 +21,7 @@ import ( "unsafe" "github.com/ollama/ollama/format" + "github.com/ollama/ollama/server/envconfig" ) type handles struct { @@ -268,7 +269,7 @@ func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) { } func getVerboseState() C.uint16_t { - if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" { + if envconfig.Debug { return C.uint16_t(1) } return C.uint16_t(0) diff --git a/integration/max_queue_test.go b/integration/max_queue_test.go new file mode 100644 index 00000000..43b15c6c --- /dev/null +++ b/integration/max_queue_test.go @@ -0,0 +1,117 @@ +//go:build integration + +package integration + +import ( + "context" + "errors" + "fmt" + "log/slog" + "os" + "strconv" + "strings" + "sync" + "testing" + "time" + + "github.com/ollama/ollama/api" + "github.com/stretchr/testify/require" +) + +func TestMaxQueue(t *testing.T) { + // Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU + // Also note that by default Darwin can't sustain > ~128 connections without adjusting limits + threadCount := 32 + mq := os.Getenv("OLLAMA_MAX_QUEUE") + if mq != "" { + var err error + threadCount, err = strconv.Atoi(mq) + require.NoError(t, err) + } else { + os.Setenv("OLLAMA_MAX_QUEUE", fmt.Sprintf("%d", threadCount)) + } + + req := api.GenerateRequest{ + Model: "orca-mini", + Prompt: "write a long historical fiction story about christopher columbus. use at least 10 facts from his actual journey", + Options: map[string]interface{}{ + "seed": 42, + "temperature": 0.0, + }, + } + resp := []string{"explore", "discover", "ocean"} + + // CPU mode takes much longer at the limit with a large queue setting + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + client, _, cleanup := InitServerConnection(ctx, t) + defer cleanup() + + require.NoError(t, PullIfMissing(ctx, client, req.Model)) + + // Context for the worker threads so we can shut them down + // embedCtx, embedCancel := context.WithCancel(ctx) + embedCtx := ctx + + var genwg sync.WaitGroup + go func() { + genwg.Add(1) + defer genwg.Done() + slog.Info("Starting generate request") + DoGenerate(ctx, t, client, req, resp, 45*time.Second, 5*time.Second) + slog.Info("generate completed") + }() + + // Give the generate a chance to get started before we start hammering on embed requests + time.Sleep(5 * time.Millisecond) + + threadCount += 10 // Add a few extra to ensure we push the queue past its limit + busyCount := 0 + resetByPeerCount := 0 + canceledCount := 0 + succesCount := 0 + counterMu := sync.Mutex{} + var embedwg sync.WaitGroup + for i := 0; i < threadCount; i++ { + go func(i int) { + embedwg.Add(1) + defer embedwg.Done() + slog.Info("embed started", "id", i) + embedReq := api.EmbeddingRequest{ + Model: req.Model, + Prompt: req.Prompt, + Options: req.Options, + } + // Fresh client for every request + client, _ = GetTestEndpoint() + + resp, genErr := client.Embeddings(embedCtx, &embedReq) + counterMu.Lock() + defer counterMu.Unlock() + switch { + case genErr == nil: + succesCount++ + require.Greater(t, len(resp.Embedding), 5) // somewhat arbitrary, but sufficient to be reasonable + case errors.Is(genErr, context.Canceled): + canceledCount++ + case strings.Contains(genErr.Error(), "busy"): + busyCount++ + case strings.Contains(genErr.Error(), "connection reset by peer"): + resetByPeerCount++ + default: + require.NoError(t, genErr, "%d request failed", i) + } + + slog.Info("embed finished", "id", i) + }(i) + } + genwg.Wait() + slog.Info("generate done, waiting for embeds") + embedwg.Wait() + + require.Equal(t, resetByPeerCount, 0, "Connections reset by peer, have you updated your fd and socket limits?") + require.True(t, busyCount > 0, "no requests hit busy error but some should have") + require.True(t, canceledCount == 0, "no requests should have been canceled due to timeout") + + slog.Info("embeds completed", "success", succesCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount) +} diff --git a/llm/ext_server/server.cpp b/llm/ext_server/server.cpp index 3448bcc5..df28c412 100644 --- a/llm/ext_server/server.cpp +++ b/llm/ext_server/server.cpp @@ -1186,8 +1186,6 @@ struct llama_server_context {"model", params.model_alias}, {"tokens_predicted", slot.n_decoded}, {"tokens_evaluated", slot.n_prompt_tokens}, - {"generation_settings", get_formated_generation(slot)}, - {"prompt", slot.prompt}, {"truncated", slot.truncated}, {"stopped_eos", slot.stopped_eos}, {"stopped_word", slot.stopped_word}, diff --git a/llm/memory.go b/llm/memory.go index b705aefe..661a0c50 100644 --- a/llm/memory.go +++ b/llm/memory.go @@ -3,12 +3,11 @@ package llm import ( "fmt" "log/slog" - "os" - "strconv" "github.com/ollama/ollama/api" "github.com/ollama/ollama/format" "github.com/ollama/ollama/gpu" + "github.com/ollama/ollama/server/envconfig" ) // This algorithm looks for a complete fit to determine if we need to unload other models @@ -50,15 +49,8 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts for _, info := range gpus { memoryAvailable += info.FreeMemory } - userLimit := os.Getenv("OLLAMA_MAX_VRAM") - if userLimit != "" { - avail, err := strconv.ParseUint(userLimit, 10, 64) - if err != nil { - slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err) - } else { - slog.Info("user override memory limit", "OLLAMA_MAX_VRAM", avail, "actual", memoryAvailable) - memoryAvailable = avail - } + if envconfig.MaxVRAM > 0 { + memoryAvailable = envconfig.MaxVRAM } slog.Debug("evaluating", "library", gpus[0].Library, "gpu_count", len(gpus), "available", format.HumanBytes2(memoryAvailable)) diff --git a/llm/patches/05-clip-fix.diff b/llm/patches/05-clip-fix.diff new file mode 100644 index 00000000..3f68a5bb --- /dev/null +++ b/llm/patches/05-clip-fix.diff @@ -0,0 +1,24 @@ +diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp +index e3c9bcd4..b43f892d 100644 +--- a/examples/llava/clip.cpp ++++ b/examples/llava/clip.cpp +@@ -573,14 +573,16 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 + struct ggml_tensor * embeddings = inp; + if (ctx->has_class_embedding) { + embeddings = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hidden_size, num_positions, batch_size); ++ } ++ ggml_set_name(embeddings, "embeddings"); ++ ggml_set_input(embeddings); ++ ++ if (ctx->has_class_embedding) { + embeddings = ggml_acc(ctx0, embeddings, model.class_embedding, + embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], 0); + embeddings = ggml_acc(ctx0, embeddings, inp, + embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], model.class_embedding->nb[1]); + } +- ggml_set_name(embeddings, "embeddings"); +- ggml_set_input(embeddings); +- + + struct ggml_tensor * positions = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_positions); + ggml_set_name(positions, "positions"); diff --git a/llm/server.go b/llm/server.go index b41f393d..44bada08 100644 --- a/llm/server.go +++ b/llm/server.go @@ -26,6 +26,7 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/format" "github.com/ollama/ollama/gpu" + "github.com/ollama/ollama/server/envconfig" ) type LlamaServer interface { @@ -124,7 +125,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr } else { servers = serversForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant } - demandLib := strings.Trim(os.Getenv("OLLAMA_LLM_LIBRARY"), "\"' ") + demandLib := envconfig.LLMLibrary if demandLib != "" { serverPath := availableServers[demandLib] if serverPath == "" { @@ -145,7 +146,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr "--batch-size", fmt.Sprintf("%d", opts.NumBatch), "--embedding", } - if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" { + if envconfig.Debug { params = append(params, "--log-format", "json") } else { params = append(params, "--log-disable") @@ -155,7 +156,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", opts.NumGPU)) } - if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" { + if envconfig.Debug { params = append(params, "--verbose") } @@ -193,16 +194,15 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr params = append(params, "--numa") } - // "--cont-batching", // TODO - doesn't seem to have any noticeable perf change for multiple requests - numParallel := 1 - if onp := os.Getenv("OLLAMA_NUM_PARALLEL"); onp != "" { - numParallel, err = strconv.Atoi(onp) - if err != nil || numParallel <= 0 { - err = fmt.Errorf("invalid OLLAMA_NUM_PARALLEL=%s must be greater than zero - %w", onp, err) - slog.Error("misconfiguration", "error", err) - return nil, err - } + numParallel := envconfig.NumParallel + + // TODO (jmorganca): multimodal models don't support parallel yet + // see https://github.com/ollama/ollama/issues/4165 + if len(projectors) > 0 { + numParallel = 1 + slog.Warn("multimodal models don't support parallel requests yet") } + params = append(params, "--parallel", fmt.Sprintf("%d", numParallel)) for i := 0; i < len(servers); i++ { diff --git a/server/envconfig/config.go b/server/envconfig/config.go new file mode 100644 index 00000000..9ad68180 --- /dev/null +++ b/server/envconfig/config.go @@ -0,0 +1,174 @@ +package envconfig + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + "runtime" + "strconv" + "strings" +) + +var ( + // Set via OLLAMA_ORIGINS in the environment + AllowOrigins []string + // Set via OLLAMA_DEBUG in the environment + Debug bool + // Set via OLLAMA_LLM_LIBRARY in the environment + LLMLibrary string + // Set via OLLAMA_MAX_LOADED_MODELS in the environment + MaxRunners int + // Set via OLLAMA_MAX_QUEUE in the environment + MaxQueuedRequests int + // Set via OLLAMA_MAX_VRAM in the environment + MaxVRAM uint64 + // Set via OLLAMA_NOPRUNE in the environment + NoPrune bool + // Set via OLLAMA_NUM_PARALLEL in the environment + NumParallel int + // Set via OLLAMA_RUNNERS_DIR in the environment + RunnersDir string + // Set via OLLAMA_TMPDIR in the environment + TmpDir string +) + +func AsMap() map[string]string { + return map[string]string{ + "OLLAMA_ORIGINS": fmt.Sprintf("%v", AllowOrigins), + "OLLAMA_DEBUG": fmt.Sprintf("%v", Debug), + "OLLAMA_LLM_LIBRARY": fmt.Sprintf("%v", LLMLibrary), + "OLLAMA_MAX_LOADED_MODELS": fmt.Sprintf("%v", MaxRunners), + "OLLAMA_MAX_QUEUE": fmt.Sprintf("%v", MaxQueuedRequests), + "OLLAMA_MAX_VRAM": fmt.Sprintf("%v", MaxVRAM), + "OLLAMA_NOPRUNE": fmt.Sprintf("%v", NoPrune), + "OLLAMA_NUM_PARALLEL": fmt.Sprintf("%v", NumParallel), + "OLLAMA_RUNNERS_DIR": fmt.Sprintf("%v", RunnersDir), + "OLLAMA_TMPDIR": fmt.Sprintf("%v", TmpDir), + } +} + +var defaultAllowOrigins = []string{ + "localhost", + "127.0.0.1", + "0.0.0.0", +} + +// Clean quotes and spaces from the value +func clean(key string) string { + return strings.Trim(os.Getenv(key), "\"' ") +} + +func init() { + // default values + NumParallel = 1 + MaxRunners = 1 + MaxQueuedRequests = 512 + + LoadConfig() +} + +func LoadConfig() { + if debug := clean("OLLAMA_DEBUG"); debug != "" { + d, err := strconv.ParseBool(debug) + if err == nil { + Debug = d + } else { + Debug = true + } + } + + RunnersDir = clean("OLLAMA_RUNNERS_DIR") + if runtime.GOOS == "windows" && RunnersDir == "" { + // On Windows we do not carry the payloads inside the main executable + appExe, err := os.Executable() + if err != nil { + slog.Error("failed to lookup executable path", "error", err) + } + + cwd, err := os.Getwd() + if err != nil { + slog.Error("failed to lookup working directory", "error", err) + } + + var paths []string + for _, root := range []string{filepath.Dir(appExe), cwd} { + paths = append(paths, + filepath.Join(root), + filepath.Join(root, "windows-"+runtime.GOARCH), + filepath.Join(root, "dist", "windows-"+runtime.GOARCH), + ) + } + + // Try a few variations to improve developer experience when building from source in the local tree + for _, p := range paths { + candidate := filepath.Join(p, "ollama_runners") + _, err := os.Stat(candidate) + if err == nil { + RunnersDir = candidate + break + } + } + if RunnersDir == "" { + slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'") + } + } + + TmpDir = clean("OLLAMA_TMPDIR") + + userLimit := clean("OLLAMA_MAX_VRAM") + if userLimit != "" { + avail, err := strconv.ParseUint(userLimit, 10, 64) + if err != nil { + slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err) + } else { + MaxVRAM = avail + } + } + + LLMLibrary = clean("OLLAMA_LLM_LIBRARY") + + if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" { + val, err := strconv.Atoi(onp) + if err != nil || val <= 0 { + slog.Error("invalid setting must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err) + } else { + NumParallel = val + } + } + + if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" { + NoPrune = true + } + + if origins := clean("OLLAMA_ORIGINS"); origins != "" { + AllowOrigins = strings.Split(origins, ",") + } + for _, allowOrigin := range defaultAllowOrigins { + AllowOrigins = append(AllowOrigins, + fmt.Sprintf("http://%s", allowOrigin), + fmt.Sprintf("https://%s", allowOrigin), + fmt.Sprintf("http://%s:*", allowOrigin), + fmt.Sprintf("https://%s:*", allowOrigin), + ) + } + + maxRunners := clean("OLLAMA_MAX_LOADED_MODELS") + if maxRunners != "" { + m, err := strconv.Atoi(maxRunners) + if err != nil { + slog.Error("invalid setting", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err) + } else { + MaxRunners = m + } + } + + if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" { + p, err := strconv.Atoi(onp) + if err != nil || p <= 0 { + slog.Error("invalid setting", "OLLAMA_MAX_QUEUE", onp, "error", err) + } else { + MaxQueuedRequests = p + } + } +} diff --git a/server/envconfig/config_test.go b/server/envconfig/config_test.go new file mode 100644 index 00000000..b2760299 --- /dev/null +++ b/server/envconfig/config_test.go @@ -0,0 +1,20 @@ +package envconfig + +import ( + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestConfig(t *testing.T) { + os.Setenv("OLLAMA_DEBUG", "") + LoadConfig() + require.False(t, Debug) + os.Setenv("OLLAMA_DEBUG", "false") + LoadConfig() + require.False(t, Debug) + os.Setenv("OLLAMA_DEBUG", "1") + LoadConfig() + require.True(t, Debug) +} diff --git a/server/images.go b/server/images.go index 68840c1a..76205392 100644 --- a/server/images.go +++ b/server/images.go @@ -29,7 +29,7 @@ import ( "github.com/ollama/ollama/convert" "github.com/ollama/ollama/format" "github.com/ollama/ollama/llm" - "github.com/ollama/ollama/parser" + "github.com/ollama/ollama/server/envconfig" "github.com/ollama/ollama/types/errtypes" "github.com/ollama/ollama/types/model" "github.com/ollama/ollama/version" @@ -63,46 +63,74 @@ func (m *Model) IsEmbedding() bool { return slices.Contains(m.Config.ModelFamilies, "bert") || slices.Contains(m.Config.ModelFamilies, "nomic-bert") } -func (m *Model) Commands() (cmds []parser.Command) { - cmds = append(cmds, parser.Command{Name: "model", Args: m.ModelPath}) +func (m *Model) String() string { + var modelfile model.File + + modelfile.Commands = append(modelfile.Commands, model.Command{ + Name: "model", + Args: m.ModelPath, + }) if m.Template != "" { - cmds = append(cmds, parser.Command{Name: "template", Args: m.Template}) + modelfile.Commands = append(modelfile.Commands, model.Command{ + Name: "template", + Args: m.Template, + }) } if m.System != "" { - cmds = append(cmds, parser.Command{Name: "system", Args: m.System}) + modelfile.Commands = append(modelfile.Commands, model.Command{ + Name: "system", + Args: m.System, + }) } for _, adapter := range m.AdapterPaths { - cmds = append(cmds, parser.Command{Name: "adapter", Args: adapter}) + modelfile.Commands = append(modelfile.Commands, model.Command{ + Name: "adapter", + Args: adapter, + }) } for _, projector := range m.ProjectorPaths { - cmds = append(cmds, parser.Command{Name: "projector", Args: projector}) + modelfile.Commands = append(modelfile.Commands, model.Command{ + Name: "projector", + Args: projector, + }) } for k, v := range m.Options { switch v := v.(type) { case []any: for _, s := range v { - cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", s)}) + modelfile.Commands = append(modelfile.Commands, model.Command{ + Name: k, + Args: fmt.Sprintf("%v", s), + }) } default: - cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", v)}) + modelfile.Commands = append(modelfile.Commands, model.Command{ + Name: k, + Args: fmt.Sprintf("%v", v), + }) } } for _, license := range m.License { - cmds = append(cmds, parser.Command{Name: "license", Args: license}) + modelfile.Commands = append(modelfile.Commands, model.Command{ + Name: "license", + Args: license, + }) } for _, msg := range m.Messages { - cmds = append(cmds, parser.Command{Name: "message", Args: fmt.Sprintf("%s %s", msg.Role, msg.Content)}) + modelfile.Commands = append(modelfile.Commands, model.Command{ + Name: "message", + Args: fmt.Sprintf("%s %s", msg.Role, msg.Content), + }) } - return cmds - + return modelfile.String() } type Message struct { @@ -329,7 +357,7 @@ func realpath(mfDir, from string) string { return abspath } -func CreateModel(ctx context.Context, name, modelFileDir, quantization string, commands []parser.Command, fn func(resp api.ProgressResponse)) error { +func CreateModel(ctx context.Context, name, modelFileDir, quantization string, modelfile *model.File, fn func(resp api.ProgressResponse)) error { deleteMap := make(map[string]struct{}) if manifest, _, err := GetManifest(ParseModelPath(name)); err == nil { for _, layer := range append(manifest.Layers, manifest.Config) { @@ -351,7 +379,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c params := make(map[string][]string) fromParams := make(map[string]any) - for _, c := range commands { + for _, c := range modelfile.Commands { mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name) switch c.Name { @@ -668,7 +696,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c return err } - if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" { + if !envconfig.NoPrune { if err := deleteUnusedLayers(nil, deleteMap, false); err != nil { return err } @@ -999,7 +1027,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu // build deleteMap to prune unused layers deleteMap := make(map[string]struct{}) - if noprune = os.Getenv("OLLAMA_NOPRUNE"); noprune == "" { + if !envconfig.NoPrune { manifest, _, err = GetManifest(mp) if err != nil && !errors.Is(err, os.ErrNotExist) { return err diff --git a/server/modelpath.go b/server/modelpath.go index 7d333876..86908226 100644 --- a/server/modelpath.go +++ b/server/modelpath.go @@ -6,6 +6,7 @@ import ( "net/url" "os" "path/filepath" + "regexp" "strings" ) @@ -25,9 +26,10 @@ const ( ) var ( - ErrInvalidImageFormat = errors.New("invalid image format") - ErrInvalidProtocol = errors.New("invalid protocol scheme") - ErrInsecureProtocol = errors.New("insecure protocol http") + ErrInvalidImageFormat = errors.New("invalid image format") + ErrInvalidProtocol = errors.New("invalid protocol scheme") + ErrInsecureProtocol = errors.New("insecure protocol http") + ErrInvalidDigestFormat = errors.New("invalid digest format") ) func ParseModelPath(name string) ModelPath { @@ -149,6 +151,17 @@ func GetBlobsPath(digest string) (string, error) { return "", err } + // only accept actual sha256 digests + pattern := "^sha256[:-][0-9a-fA-F]{64}$" + re := regexp.MustCompile(pattern) + if err != nil { + return "", err + } + + if digest != "" && !re.MatchString(digest) { + return "", ErrInvalidDigestFormat + } + digest = strings.ReplaceAll(digest, ":", "-") path := filepath.Join(dir, "blobs", digest) dirPath := filepath.Dir(path) diff --git a/server/modelpath_test.go b/server/modelpath_test.go index 8b26d52c..30741d87 100644 --- a/server/modelpath_test.go +++ b/server/modelpath_test.go @@ -1,6 +1,73 @@ package server -import "testing" +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGetBlobsPath(t *testing.T) { + // GetBlobsPath expects an actual directory to exist + dir, err := os.MkdirTemp("", "ollama-test") + assert.Nil(t, err) + defer os.RemoveAll(dir) + + tests := []struct { + name string + digest string + expected string + err error + }{ + { + "empty digest", + "", + filepath.Join(dir, "blobs"), + nil, + }, + { + "valid with colon", + "sha256:456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9", + filepath.Join(dir, "blobs", "sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9"), + nil, + }, + { + "valid with dash", + "sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9", + filepath.Join(dir, "blobs", "sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9"), + nil, + }, + { + "digest too short", + "sha256-45640291", + "", + ErrInvalidDigestFormat, + }, + { + "digest too long", + "sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9aaaaaaaaaa", + "", + ErrInvalidDigestFormat, + }, + { + "digest invalid chars", + "../sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7a", + "", + ErrInvalidDigestFormat, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Setenv("OLLAMA_MODELS", dir) + + got, err := GetBlobsPath(tc.digest) + + assert.ErrorIs(t, tc.err, err, tc.name) + assert.Equal(t, tc.expected, got, tc.name) + }) + } +} func TestParseModelPath(t *testing.T) { tests := []struct { diff --git a/server/routes.go b/server/routes.go index 480527f2..e878598a 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1,6 +1,7 @@ package server import ( + "cmp" "context" "encoding/json" "errors" @@ -28,7 +29,7 @@ import ( "github.com/ollama/ollama/gpu" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/openai" - "github.com/ollama/ollama/parser" + "github.com/ollama/ollama/server/envconfig" "github.com/ollama/ollama/types/model" "github.com/ollama/ollama/version" ) @@ -146,12 +147,7 @@ func (s *Server) GenerateHandler(c *gin.Context) { select { case runner = <-rCh: case err = <-eCh: - if errors.Is(err, context.Canceled) { - c.JSON(499, gin.H{"error": "request canceled"}) - return - } - - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + handleErrorResponse(c, err) return } @@ -394,12 +390,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) { select { case runner = <-rCh: case err = <-eCh: - if errors.Is(err, context.Canceled) { - c.JSON(499, gin.H{"error": "request canceled"}) - return - } - - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + handleErrorResponse(c, err) return } @@ -522,28 +513,17 @@ func (s *Server) PushModelHandler(c *gin.Context) { func (s *Server) CreateModelHandler(c *gin.Context) { var req api.CreateRequest - err := c.ShouldBindJSON(&req) - switch { - case errors.Is(err, io.EOF): + if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) { c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"}) return - case err != nil: + } else if err != nil { c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) return } - var model string - if req.Model != "" { - model = req.Model - } else if req.Name != "" { - model = req.Name - } else { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"}) - return - } - - if err := ParseModelPath(model).Validate(); err != nil { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + name := model.ParseName(cmp.Or(req.Model, req.Name)) + if !name.IsValid() { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid model name"}) return } @@ -552,19 +532,19 @@ func (s *Server) CreateModelHandler(c *gin.Context) { return } - var modelfile io.Reader = strings.NewReader(req.Modelfile) + var r io.Reader = strings.NewReader(req.Modelfile) if req.Path != "" && req.Modelfile == "" { - mf, err := os.Open(req.Path) + f, err := os.Open(req.Path) if err != nil { c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)}) return } - defer mf.Close() + defer f.Close() - modelfile = mf + r = f } - commands, err := parser.Parse(modelfile) + modelfile, err := model.ParseFile(r) if err != nil { c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) return @@ -580,7 +560,7 @@ func (s *Server) CreateModelHandler(c *gin.Context) { ctx, cancel := context.WithCancel(c.Request.Context()) defer cancel() - if err := CreateModel(ctx, model, filepath.Dir(req.Path), req.Quantization, commands, fn); err != nil { + if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), req.Quantization, modelfile, fn); err != nil { ch <- gin.H{"error": err.Error()} } }() @@ -732,7 +712,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) { fmt.Fprintln(&sb, "# Modelfile generate by \"ollama show\"") fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:") fmt.Fprintf(&sb, "# FROM %s\n\n", model.ShortName) - fmt.Fprint(&sb, parser.Format(model.Commands())) + fmt.Fprint(&sb, model.String()) resp.Modelfile = sb.String() return resp, nil @@ -880,12 +860,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) { c.Status(http.StatusCreated) } -var defaultAllowOrigins = []string{ - "localhost", - "127.0.0.1", - "0.0.0.0", -} - func isLocalIP(ip netip.Addr) bool { if interfaces, err := net.Interfaces(); err == nil { for _, iface := range interfaces { @@ -969,19 +943,7 @@ func (s *Server) GenerateRoutes() http.Handler { config := cors.DefaultConfig() config.AllowWildcard = true config.AllowBrowserExtensions = true - - if allowedOrigins := strings.Trim(os.Getenv("OLLAMA_ORIGINS"), "\"'"); allowedOrigins != "" { - config.AllowOrigins = strings.Split(allowedOrigins, ",") - } - - for _, allowOrigin := range defaultAllowOrigins { - config.AllowOrigins = append(config.AllowOrigins, - fmt.Sprintf("http://%s", allowOrigin), - fmt.Sprintf("https://%s", allowOrigin), - fmt.Sprintf("http://%s:*", allowOrigin), - fmt.Sprintf("https://%s:*", allowOrigin), - ) - } + config.AllowOrigins = envconfig.AllowOrigins r := gin.Default() r.Use( @@ -1020,10 +982,11 @@ func (s *Server) GenerateRoutes() http.Handler { func Serve(ln net.Listener) error { level := slog.LevelInfo - if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" { + if envconfig.Debug { level = slog.LevelDebug } + slog.Info("server config", "env", envconfig.AsMap()) handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{ Level: level, AddSource: true, @@ -1047,7 +1010,7 @@ func Serve(ln net.Listener) error { return err } - if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" { + if !envconfig.NoPrune { // clean up unused layers and manifests if err := PruneLayers(); err != nil { return err @@ -1223,12 +1186,7 @@ func (s *Server) ChatHandler(c *gin.Context) { select { case runner = <-rCh: case err = <-eCh: - if errors.Is(err, context.Canceled) { - c.JSON(499, gin.H{"error": "request canceled"}) - return - } - - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + handleErrorResponse(c, err) return } @@ -1349,3 +1307,15 @@ func (s *Server) ChatHandler(c *gin.Context) { streamResponse(c, ch) } + +func handleErrorResponse(c *gin.Context, err error) { + if errors.Is(err, context.Canceled) { + c.JSON(499, gin.H{"error": "request canceled"}) + return + } + if errors.Is(err, ErrMaxQueue) { + c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()}) + return + } + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) +} diff --git a/server/routes_test.go b/server/routes_test.go index 6ac98367..27e53cbd 100644 --- a/server/routes_test.go +++ b/server/routes_test.go @@ -17,7 +17,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/parser" + "github.com/ollama/ollama/types/model" "github.com/ollama/ollama/version" ) @@ -55,13 +55,13 @@ func Test_Routes(t *testing.T) { createTestModel := func(t *testing.T, name string) { fname := createTestFile(t, "ollama-model") - modelfile := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar", fname)) - commands, err := parser.Parse(modelfile) + r := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar", fname)) + modelfile, err := model.ParseFile(r) assert.Nil(t, err) fn := func(resp api.ProgressResponse) { t.Logf("Status: %s", resp.Status) } - err = CreateModel(context.TODO(), name, "", "", commands, fn) + err = CreateModel(context.TODO(), name, "", "", modelfile, fn) assert.Nil(t, err) } diff --git a/server/sched.go b/server/sched.go index 61c5e1b3..164814a3 100644 --- a/server/sched.go +++ b/server/sched.go @@ -5,10 +5,8 @@ import ( "errors" "fmt" "log/slog" - "os" "reflect" "sort" - "strconv" "strings" "sync" "time" @@ -17,6 +15,7 @@ import ( "github.com/ollama/ollama/format" "github.com/ollama/ollama/gpu" "github.com/ollama/ollama/llm" + "github.com/ollama/ollama/server/envconfig" "golang.org/x/exp/slices" ) @@ -43,35 +42,14 @@ type Scheduler struct { getGpuFn func() gpu.GpuInfoList } -// TODO set this to zero after a release or two, to enable multiple models by default -var loadedMax = 1 // Maximum runners; < 1 maps to as many as will fit in VRAM (unlimited for CPU runners) -var maxQueuedRequests = 10 // TODO configurable -var numParallel = 1 +var ErrMaxQueue = fmt.Errorf("server busy, please try again. maximum pending requests exceeded") func InitScheduler(ctx context.Context) *Scheduler { - maxRunners := os.Getenv("OLLAMA_MAX_LOADED_MODELS") - if maxRunners != "" { - m, err := strconv.Atoi(maxRunners) - if err != nil { - slog.Error("invalid setting", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err) - } else { - loadedMax = m - } - } - if onp := os.Getenv("OLLAMA_NUM_PARALLEL"); onp != "" { - p, err := strconv.Atoi(onp) - if err != nil || p <= 0 { - slog.Error("invalid parallel setting, must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err) - } else { - numParallel = p - } - } - sched := &Scheduler{ - pendingReqCh: make(chan *LlmRequest, maxQueuedRequests), - finishedReqCh: make(chan *LlmRequest, maxQueuedRequests), - expiredCh: make(chan *runnerRef, maxQueuedRequests), - unloadedCh: make(chan interface{}, maxQueuedRequests), + pendingReqCh: make(chan *LlmRequest, envconfig.MaxQueuedRequests), + finishedReqCh: make(chan *LlmRequest, envconfig.MaxQueuedRequests), + expiredCh: make(chan *runnerRef, envconfig.MaxQueuedRequests), + unloadedCh: make(chan interface{}, envconfig.MaxQueuedRequests), loaded: make(map[string]*runnerRef), newServerFn: llm.NewLlamaServer, getGpuFn: gpu.GetGPUInfo, @@ -82,6 +60,9 @@ func InitScheduler(ctx context.Context) *Scheduler { // context must be canceled to decrement ref count and release the runner func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration time.Duration) (chan *runnerRef, chan error) { + // allocate a large enough kv cache for all parallel requests + opts.NumCtx = opts.NumCtx * envconfig.NumParallel + req := &LlmRequest{ ctx: c, model: model, @@ -90,12 +71,11 @@ func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, successCh: make(chan *runnerRef), errCh: make(chan error, 1), } - // context split across parallel threads - opts.NumCtx = opts.NumCtx * numParallel + select { case s.pendingReqCh <- req: default: - req.errCh <- fmt.Errorf("server busy, please try again. maximum pending requests exceeded") + req.errCh <- ErrMaxQueue } return req.successCh, req.errCh } @@ -134,11 +114,11 @@ func (s *Scheduler) processPending(ctx context.Context) { pending.useLoadedRunner(runner, s.finishedReqCh) break } - } else if loadedMax > 0 && loadedCount >= loadedMax { + } else if envconfig.MaxRunners > 0 && loadedCount >= envconfig.MaxRunners { slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount) - runnerToExpire = s.findRunnerToUnload(pending) + runnerToExpire = s.findRunnerToUnload() } else { - // Either no models are loaded or below loadedMax + // Either no models are loaded or below envconfig.MaxRunners // Get a refreshed GPU list gpus := s.getGpuFn() @@ -149,7 +129,7 @@ func (s *Scheduler) processPending(ctx context.Context) { break } - // If we're CPU only mode, just limit by loadedMax above + // If we're CPU only mode, just limit by envconfig.MaxRunners above // TODO handle system memory exhaustion if (len(gpus) == 1 && gpus[0].Library == "cpu") || pending.opts.NumGPU == 0 { slog.Debug("cpu mode with existing models, loading") @@ -177,7 +157,7 @@ func (s *Scheduler) processPending(ctx context.Context) { s.loadFn(pending, ggml, gpus) break } - runnerToExpire = s.findRunnerToUnload(pending) + runnerToExpire = s.findRunnerToUnload() } if runnerToExpire == nil { @@ -277,9 +257,9 @@ func (s *Scheduler) processCompleted(ctx context.Context) { continue } + s.loadedMu.Lock() slog.Debug("got lock to unload", "model", runner.model) runner.unload() - s.loadedMu.Lock() delete(s.loaded, runner.model) s.loadedMu.Unlock() slog.Debug("runner released", "model", runner.model) @@ -524,7 +504,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu. } // findRunnerToUnload finds a runner to unload to make room for a new model -func (s *Scheduler) findRunnerToUnload(req *LlmRequest) *runnerRef { +func (s *Scheduler) findRunnerToUnload() *runnerRef { s.loadedMu.Lock() runnerList := make([]*runnerRef, 0, len(s.loaded)) for _, r := range s.loaded { diff --git a/server/sched_test.go b/server/sched_test.go index 32a80674..3e47ed02 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -15,6 +15,7 @@ import ( "github.com/ollama/ollama/format" "github.com/ollama/ollama/gpu" "github.com/ollama/ollama/llm" + "github.com/ollama/ollama/server/envconfig" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -27,38 +28,14 @@ func init() { func TestInitScheduler(t *testing.T) { ctx, done := context.WithCancel(context.Background()) defer done() - initialMax := loadedMax - initialParallel := numParallel s := InitScheduler(ctx) - require.Equal(t, initialMax, loadedMax) s.loadedMu.Lock() require.NotNil(t, s.loaded) s.loadedMu.Unlock() - - os.Setenv("OLLAMA_MAX_LOADED_MODELS", "blue") - s = InitScheduler(ctx) - require.Equal(t, initialMax, loadedMax) - s.loadedMu.Lock() - require.NotNil(t, s.loaded) - s.loadedMu.Unlock() - - os.Setenv("OLLAMA_MAX_LOADED_MODELS", "0") - s = InitScheduler(ctx) - require.Equal(t, 0, loadedMax) - s.loadedMu.Lock() - require.NotNil(t, s.loaded) - s.loadedMu.Unlock() - - os.Setenv("OLLAMA_NUM_PARALLEL", "blue") - _ = InitScheduler(ctx) - require.Equal(t, initialParallel, numParallel) - os.Setenv("OLLAMA_NUM_PARALLEL", "10") - _ = InitScheduler(ctx) - require.Equal(t, 10, numParallel) } func TestLoad(t *testing.T) { - ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond) + ctx, done := context.WithTimeout(context.Background(), 20*time.Millisecond) defer done() s := InitScheduler(ctx) var ggml *llm.GGML // value not used in tests @@ -174,7 +151,7 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV } func TestRequests(t *testing.T) { - ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) + ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond) defer done() // Same model, same request @@ -249,7 +226,7 @@ func TestRequests(t *testing.T) { t.Errorf("timeout") } - loadedMax = 1 + envconfig.MaxRunners = 1 s.newServerFn = scenario3a.newServer slog.Info("scenario3a") s.pendingReqCh <- scenario3a.req @@ -268,7 +245,7 @@ func TestRequests(t *testing.T) { require.Len(t, s.loaded, 1) s.loadedMu.Unlock() - loadedMax = 0 + envconfig.MaxRunners = 0 s.newServerFn = scenario3b.newServer slog.Info("scenario3b") s.pendingReqCh <- scenario3b.req @@ -329,7 +306,7 @@ func TestRequests(t *testing.T) { } func TestGetRunner(t *testing.T) { - ctx, done := context.WithTimeout(context.Background(), 20*time.Millisecond) + ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) defer done() // Same model, same request @@ -339,7 +316,7 @@ func TestGetRunner(t *testing.T) { scenario1b.req.sessionDuration = 0 scenario1c := newScenario(t, ctx, "ollama-model-1c", 10) scenario1c.req.sessionDuration = 0 - maxQueuedRequests = 1 + envconfig.MaxQueuedRequests = 1 s := InitScheduler(ctx) s.getGpuFn = func() gpu.GpuInfoList { g := gpu.GpuInfo{Library: "metal"} @@ -391,7 +368,7 @@ func TestGetRunner(t *testing.T) { // TODO - add one scenario that triggers the bogus finished event with positive ref count func TestPrematureExpired(t *testing.T) { - ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) + ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond) defer done() // Same model, same request @@ -436,7 +413,7 @@ func TestPrematureExpired(t *testing.T) { } func TestUseLoadedRunner(t *testing.T) { - ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond) + ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) req := &LlmRequest{ ctx: ctx, opts: api.DefaultOptions(), @@ -461,7 +438,7 @@ func TestUseLoadedRunner(t *testing.T) { } func TestUpdateFreeSpace(t *testing.T) { - ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond) + ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) defer done() gpus := gpu.GpuInfoList{ { @@ -494,12 +471,9 @@ func TestUpdateFreeSpace(t *testing.T) { } func TestFindRunnerToUnload(t *testing.T) { - ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond) + ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) defer done() - req := &LlmRequest{ - ctx: ctx, - opts: api.DefaultOptions(), - } + r1 := &runnerRef{refCount: 1, sessionDuration: 1} r2 := &runnerRef{sessionDuration: 2} @@ -509,16 +483,16 @@ func TestFindRunnerToUnload(t *testing.T) { s.loaded["b"] = r2 s.loadedMu.Unlock() - resp := s.findRunnerToUnload(req) + resp := s.findRunnerToUnload() require.Equal(t, r2, resp) r2.refCount = 1 - resp = s.findRunnerToUnload(req) + resp = s.findRunnerToUnload() require.Equal(t, r1, resp) } func TestNeedsReload(t *testing.T) { - ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond) + ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) defer done() llm := &mockLlm{} @@ -562,7 +536,7 @@ func TestNeedsReload(t *testing.T) { } func TestUnloadAllRunners(t *testing.T) { - ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond) + ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) defer done() llm1 := &mockLlm{} diff --git a/parser/parser.go b/types/model/file.go similarity index 86% rename from parser/parser.go rename to types/model/file.go index 9d1f3388..c614fd32 100644 --- a/parser/parser.go +++ b/types/model/file.go @@ -1,4 +1,4 @@ -package parser +package model import ( "bufio" @@ -10,11 +10,41 @@ import ( "strings" ) +type File struct { + Commands []Command +} + +func (f File) String() string { + var sb strings.Builder + for _, cmd := range f.Commands { + fmt.Fprintln(&sb, cmd.String()) + } + + return sb.String() +} + type Command struct { Name string Args string } +func (c Command) String() string { + var sb strings.Builder + switch c.Name { + case "model": + fmt.Fprintf(&sb, "FROM %s", c.Args) + case "license", "template", "system", "adapter": + fmt.Fprintf(&sb, "%s %s", strings.ToUpper(c.Name), quote(c.Args)) + case "message": + role, message, _ := strings.Cut(c.Args, ": ") + fmt.Fprintf(&sb, "MESSAGE %s %s", role, quote(message)) + default: + fmt.Fprintf(&sb, "PARAMETER %s %s", c.Name, quote(c.Args)) + } + + return sb.String() +} + type state int const ( @@ -32,38 +62,14 @@ var ( errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"") ) -func Format(cmds []Command) string { - var sb strings.Builder - for _, cmd := range cmds { - name := cmd.Name - args := cmd.Args - - switch cmd.Name { - case "model": - name = "from" - args = cmd.Args - case "license", "template", "system", "adapter": - args = quote(args) - case "message": - role, message, _ := strings.Cut(cmd.Args, ": ") - args = role + " " + quote(message) - default: - name = "parameter" - args = cmd.Name + " " + quote(cmd.Args) - } - - fmt.Fprintln(&sb, strings.ToUpper(name), args) - } - - return sb.String() -} - -func Parse(r io.Reader) (cmds []Command, err error) { +func ParseFile(r io.Reader) (*File, error) { var cmd Command var curr state var b bytes.Buffer var role string + var f File + br := bufio.NewReader(r) for { r, _, err := br.ReadRune() @@ -128,7 +134,7 @@ func Parse(r io.Reader) (cmds []Command, err error) { } cmd.Args = s - cmds = append(cmds, cmd) + f.Commands = append(f.Commands, cmd) } b.Reset() @@ -157,14 +163,14 @@ func Parse(r io.Reader) (cmds []Command, err error) { } cmd.Args = s - cmds = append(cmds, cmd) + f.Commands = append(f.Commands, cmd) default: return nil, io.ErrUnexpectedEOF } - for _, cmd := range cmds { + for _, cmd := range f.Commands { if cmd.Name == "model" { - return cmds, nil + return &f, nil } } diff --git a/parser/parser_test.go b/types/model/file_test.go similarity index 80% rename from parser/parser_test.go rename to types/model/file_test.go index a28205aa..d51c8d70 100644 --- a/parser/parser_test.go +++ b/types/model/file_test.go @@ -1,4 +1,4 @@ -package parser +package model import ( "bytes" @@ -10,7 +10,7 @@ import ( "github.com/stretchr/testify/assert" ) -func TestParser(t *testing.T) { +func TestParseFileFile(t *testing.T) { input := ` FROM model1 ADAPTER adapter1 @@ -22,8 +22,8 @@ TEMPLATE template1 reader := strings.NewReader(input) - commands, err := Parse(reader) - assert.Nil(t, err) + modelfile, err := ParseFile(reader) + assert.NoError(t, err) expectedCommands := []Command{ {Name: "model", Args: "model1"}, @@ -34,10 +34,10 @@ TEMPLATE template1 {Name: "template", Args: "template1"}, } - assert.Equal(t, expectedCommands, commands) + assert.Equal(t, expectedCommands, modelfile.Commands) } -func TestParserFrom(t *testing.T) { +func TestParseFileFrom(t *testing.T) { var cases = []struct { input string expected []Command @@ -85,14 +85,16 @@ func TestParserFrom(t *testing.T) { for _, c := range cases { t.Run("", func(t *testing.T) { - commands, err := Parse(strings.NewReader(c.input)) + modelfile, err := ParseFile(strings.NewReader(c.input)) assert.ErrorIs(t, err, c.err) - assert.Equal(t, c.expected, commands) + if modelfile != nil { + assert.Equal(t, c.expected, modelfile.Commands) + } }) } } -func TestParserParametersMissingValue(t *testing.T) { +func TestParseFileParametersMissingValue(t *testing.T) { input := ` FROM foo PARAMETER param1 @@ -100,21 +102,21 @@ PARAMETER param1 reader := strings.NewReader(input) - _, err := Parse(reader) + _, err := ParseFile(reader) assert.ErrorIs(t, err, io.ErrUnexpectedEOF) } -func TestParserBadCommand(t *testing.T) { +func TestParseFileBadCommand(t *testing.T) { input := ` FROM foo BADCOMMAND param1 value1 ` - _, err := Parse(strings.NewReader(input)) + _, err := ParseFile(strings.NewReader(input)) assert.ErrorIs(t, err, errInvalidCommand) } -func TestParserMessages(t *testing.T) { +func TestParseFileMessages(t *testing.T) { var cases = []struct { input string expected []Command @@ -123,34 +125,34 @@ func TestParserMessages(t *testing.T) { { ` FROM foo -MESSAGE system You are a Parser. Always Parse things. +MESSAGE system You are a file parser. Always parse things. `, []Command{ {Name: "model", Args: "foo"}, - {Name: "message", Args: "system: You are a Parser. Always Parse things."}, + {Name: "message", Args: "system: You are a file parser. Always parse things."}, }, nil, }, { ` FROM foo -MESSAGE system You are a Parser. Always Parse things.`, +MESSAGE system You are a file parser. Always parse things.`, []Command{ {Name: "model", Args: "foo"}, - {Name: "message", Args: "system: You are a Parser. Always Parse things."}, + {Name: "message", Args: "system: You are a file parser. Always parse things."}, }, nil, }, { ` FROM foo -MESSAGE system You are a Parser. Always Parse things. +MESSAGE system You are a file parser. Always parse things. MESSAGE user Hey there! MESSAGE assistant Hello, I want to parse all the things! `, []Command{ {Name: "model", Args: "foo"}, - {Name: "message", Args: "system: You are a Parser. Always Parse things."}, + {Name: "message", Args: "system: You are a file parser. Always parse things."}, {Name: "message", Args: "user: Hey there!"}, {Name: "message", Args: "assistant: Hello, I want to parse all the things!"}, }, @@ -160,12 +162,12 @@ MESSAGE assistant Hello, I want to parse all the things! ` FROM foo MESSAGE system """ -You are a multiline Parser. Always Parse things. +You are a multiline file parser. Always parse things. """ `, []Command{ {Name: "model", Args: "foo"}, - {Name: "message", Args: "system: \nYou are a multiline Parser. Always Parse things.\n"}, + {Name: "message", Args: "system: \nYou are a multiline file parser. Always parse things.\n"}, }, nil, }, @@ -196,14 +198,16 @@ MESSAGE system`, for _, c := range cases { t.Run("", func(t *testing.T) { - commands, err := Parse(strings.NewReader(c.input)) + modelfile, err := ParseFile(strings.NewReader(c.input)) assert.ErrorIs(t, err, c.err) - assert.Equal(t, c.expected, commands) + if modelfile != nil { + assert.Equal(t, c.expected, modelfile.Commands) + } }) } } -func TestParserQuoted(t *testing.T) { +func TestParseFileQuoted(t *testing.T) { var cases = []struct { multiline string expected []Command @@ -348,14 +352,16 @@ TEMPLATE """ for _, c := range cases { t.Run("", func(t *testing.T) { - commands, err := Parse(strings.NewReader(c.multiline)) + modelfile, err := ParseFile(strings.NewReader(c.multiline)) assert.ErrorIs(t, err, c.err) - assert.Equal(t, c.expected, commands) + if modelfile != nil { + assert.Equal(t, c.expected, modelfile.Commands) + } }) } } -func TestParserParameters(t *testing.T) { +func TestParseFileParameters(t *testing.T) { var cases = map[string]struct { name, value string }{ @@ -404,18 +410,18 @@ func TestParserParameters(t *testing.T) { var b bytes.Buffer fmt.Fprintln(&b, "FROM foo") fmt.Fprintln(&b, "PARAMETER", k) - commands, err := Parse(&b) - assert.Nil(t, err) + modelfile, err := ParseFile(&b) + assert.NoError(t, err) assert.Equal(t, []Command{ {Name: "model", Args: "foo"}, {Name: v.name, Args: v.value}, - }, commands) + }, modelfile.Commands) }) } } -func TestParserComments(t *testing.T) { +func TestParseFileComments(t *testing.T) { var cases = []struct { input string expected []Command @@ -433,14 +439,14 @@ FROM foo for _, c := range cases { t.Run("", func(t *testing.T) { - commands, err := Parse(strings.NewReader(c.input)) - assert.Nil(t, err) - assert.Equal(t, c.expected, commands) + modelfile, err := ParseFile(strings.NewReader(c.input)) + assert.NoError(t, err) + assert.Equal(t, c.expected, modelfile.Commands) }) } } -func TestParseFormatParse(t *testing.T) { +func TestParseFileFormatParseFile(t *testing.T) { var cases = []string{ ` FROM foo @@ -449,7 +455,7 @@ LICENSE MIT PARAMETER param1 value1 PARAMETER param2 value2 TEMPLATE template1 -MESSAGE system You are a Parser. Always Parse things. +MESSAGE system You are a file parser. Always parse things. MESSAGE user Hey there! MESSAGE assistant Hello, I want to parse all the things! `, @@ -488,13 +494,13 @@ MESSAGE assistant Hello, I want to parse all the things! for _, c := range cases { t.Run("", func(t *testing.T) { - commands, err := Parse(strings.NewReader(c)) + modelfile, err := ParseFile(strings.NewReader(c)) assert.NoError(t, err) - commands2, err := Parse(strings.NewReader(Format(commands))) + modelfile2, err := ParseFile(strings.NewReader(modelfile.String())) assert.NoError(t, err) - assert.Equal(t, commands, commands2) + assert.Equal(t, modelfile, modelfile2) }) } diff --git a/types/model/name.go b/types/model/name.go index cb890b3a..fbb30fd4 100644 --- a/types/model/name.go +++ b/types/model/name.go @@ -161,7 +161,7 @@ func ParseNameBare(s string) Name { } scheme, host, ok := strings.Cut(s, "://") - if ! ok { + if !ok { host = scheme } n.Host = host