diff --git a/README.md b/README.md
index a1a9c288..8d7659c5 100644
--- a/README.md
+++ b/README.md
@@ -370,12 +370,13 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram)
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
-- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
- [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support)
- [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot)
- [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
- [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
+- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
+- [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
@@ -384,4 +385,5 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
### Supported backends
-- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
\ No newline at end of file
+- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
+
diff --git a/app/lifecycle/logging.go b/app/lifecycle/logging.go
index 98df9b41..4be90648 100644
--- a/app/lifecycle/logging.go
+++ b/app/lifecycle/logging.go
@@ -5,12 +5,14 @@ import (
"log/slog"
"os"
"path/filepath"
+
+ "github.com/ollama/ollama/server/envconfig"
)
func InitLogging() {
level := slog.LevelInfo
- if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
+ if envconfig.Debug {
level = slog.LevelDebug
}
diff --git a/app/lifecycle/updater_windows.go b/app/lifecycle/updater_windows.go
index f26c43c9..4053671a 100644
--- a/app/lifecycle/updater_windows.go
+++ b/app/lifecycle/updater_windows.go
@@ -31,16 +31,13 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
"/LOG=" + filepath.Base(UpgradeLogFile), // Only relative seems reliable, so set pwd
"/FORCECLOSEAPPLICATIONS", // Force close the tray app - might be needed
}
- // When we're not in debug mode, make the upgrade as quiet as possible (no GUI, no prompts)
- // TODO - temporarily disable since we're pinning in debug mode for the preview
- // if debug := os.Getenv("OLLAMA_DEBUG"); debug == "" {
+ // make the upgrade as quiet as possible (no GUI, no prompts)
installArgs = append(installArgs,
"/SP", // Skip the "This will install... Do you wish to continue" prompt
"/SUPPRESSMSGBOXES",
"/SILENT",
"/VERYSILENT",
)
- // }
// Safeguard in case we have requests in flight that need to drain...
slog.Info("Waiting for server to shutdown")
diff --git a/app/tray/wintray/menus.go b/app/tray/wintray/menus.go
index 74defa67..9cb3b893 100644
--- a/app/tray/wintray/menus.go
+++ b/app/tray/wintray/menus.go
@@ -1,71 +1,71 @@
-//go:build windows
-
-package wintray
-
-import (
- "fmt"
- "log/slog"
- "unsafe"
-
- "golang.org/x/sys/windows"
-)
-
-const (
- updatAvailableMenuID = 1
- updateMenuID = updatAvailableMenuID + 1
- separatorMenuID = updateMenuID + 1
- diagLogsMenuID = separatorMenuID + 1
- diagSeparatorMenuID = diagLogsMenuID + 1
- quitMenuID = diagSeparatorMenuID + 1
-)
-
-func (t *winTray) initMenus() error {
- if err := t.addOrUpdateMenuItem(diagLogsMenuID, 0, diagLogsMenuTitle, false); err != nil {
- return fmt.Errorf("unable to create menu entries %w\n", err)
- }
- if err := t.addSeparatorMenuItem(diagSeparatorMenuID, 0); err != nil {
- return fmt.Errorf("unable to create menu entries %w", err)
- }
- if err := t.addOrUpdateMenuItem(quitMenuID, 0, quitMenuTitle, false); err != nil {
- return fmt.Errorf("unable to create menu entries %w\n", err)
- }
- return nil
-}
-
-func (t *winTray) UpdateAvailable(ver string) error {
- if !t.updateNotified {
- slog.Debug("updating menu and sending notification for new update")
- if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
- return fmt.Errorf("unable to create menu entries %w", err)
- }
- if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {
- return fmt.Errorf("unable to create menu entries %w", err)
- }
- if err := t.addSeparatorMenuItem(separatorMenuID, 0); err != nil {
- return fmt.Errorf("unable to create menu entries %w", err)
- }
- iconFilePath, err := iconBytesToFilePath(wt.updateIcon)
- if err != nil {
- return fmt.Errorf("unable to write icon data to temp file: %w", err)
- }
- if err := wt.setIcon(iconFilePath); err != nil {
- return fmt.Errorf("unable to set icon: %w", err)
- }
- t.updateNotified = true
-
- t.pendingUpdate = true
- // Now pop up the notification
- t.muNID.Lock()
- defer t.muNID.Unlock()
- copy(t.nid.InfoTitle[:], windows.StringToUTF16(updateTitle))
- copy(t.nid.Info[:], windows.StringToUTF16(fmt.Sprintf(updateMessage, ver)))
- t.nid.Flags |= NIF_INFO
- t.nid.Timeout = 10
- t.nid.Size = uint32(unsafe.Sizeof(*wt.nid))
- err = t.nid.modify()
- if err != nil {
- return err
- }
- }
- return nil
-}
+//go:build windows
+
+package wintray
+
+import (
+ "fmt"
+ "log/slog"
+ "unsafe"
+
+ "golang.org/x/sys/windows"
+)
+
+const (
+ updatAvailableMenuID = 1
+ updateMenuID = updatAvailableMenuID + 1
+ separatorMenuID = updateMenuID + 1
+ diagLogsMenuID = separatorMenuID + 1
+ diagSeparatorMenuID = diagLogsMenuID + 1
+ quitMenuID = diagSeparatorMenuID + 1
+)
+
+func (t *winTray) initMenus() error {
+ if err := t.addOrUpdateMenuItem(diagLogsMenuID, 0, diagLogsMenuTitle, false); err != nil {
+ return fmt.Errorf("unable to create menu entries %w\n", err)
+ }
+ if err := t.addSeparatorMenuItem(diagSeparatorMenuID, 0); err != nil {
+ return fmt.Errorf("unable to create menu entries %w", err)
+ }
+ if err := t.addOrUpdateMenuItem(quitMenuID, 0, quitMenuTitle, false); err != nil {
+ return fmt.Errorf("unable to create menu entries %w\n", err)
+ }
+ return nil
+}
+
+func (t *winTray) UpdateAvailable(ver string) error {
+ if !t.updateNotified {
+ slog.Debug("updating menu and sending notification for new update")
+ if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
+ return fmt.Errorf("unable to create menu entries %w", err)
+ }
+ if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {
+ return fmt.Errorf("unable to create menu entries %w", err)
+ }
+ if err := t.addSeparatorMenuItem(separatorMenuID, 0); err != nil {
+ return fmt.Errorf("unable to create menu entries %w", err)
+ }
+ iconFilePath, err := iconBytesToFilePath(wt.updateIcon)
+ if err != nil {
+ return fmt.Errorf("unable to write icon data to temp file: %w", err)
+ }
+ if err := wt.setIcon(iconFilePath); err != nil {
+ return fmt.Errorf("unable to set icon: %w", err)
+ }
+ t.updateNotified = true
+
+ t.pendingUpdate = true
+ // Now pop up the notification
+ t.muNID.Lock()
+ defer t.muNID.Unlock()
+ copy(t.nid.InfoTitle[:], windows.StringToUTF16(updateTitle))
+ copy(t.nid.Info[:], windows.StringToUTF16(fmt.Sprintf(updateMessage, ver)))
+ t.nid.Flags |= NIF_INFO
+ t.nid.Timeout = 10
+ t.nid.Size = uint32(unsafe.Sizeof(*wt.nid))
+ err = t.nid.modify()
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/cmd/cmd.go b/cmd/cmd.go
index afae9d90..faac424c 100644
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -34,7 +34,6 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/auth"
"github.com/ollama/ollama/format"
- "github.com/ollama/ollama/parser"
"github.com/ollama/ollama/progress"
"github.com/ollama/ollama/server"
"github.com/ollama/ollama/types/errtypes"
@@ -57,13 +56,13 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
p := progress.NewProgress(os.Stderr)
defer p.Stop()
- modelfile, err := os.Open(filename)
+ f, err := os.Open(filename)
if err != nil {
return err
}
- defer modelfile.Close()
+ defer f.Close()
- commands, err := parser.Parse(modelfile)
+ modelfile, err := model.ParseFile(f)
if err != nil {
return err
}
@@ -77,10 +76,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
spinner := progress.NewSpinner(status)
p.Add(status, spinner)
- for i := range commands {
- switch commands[i].Name {
+ for i := range modelfile.Commands {
+ switch modelfile.Commands[i].Name {
case "model", "adapter":
- path := commands[i].Args
+ path := modelfile.Commands[i].Args
if path == "~" {
path = home
} else if strings.HasPrefix(path, "~/") {
@@ -92,7 +91,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
}
fi, err := os.Stat(path)
- if errors.Is(err, os.ErrNotExist) && commands[i].Name == "model" {
+ if errors.Is(err, os.ErrNotExist) && modelfile.Commands[i].Name == "model" {
continue
} else if err != nil {
return err
@@ -115,7 +114,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return err
}
- commands[i].Args = "@"+digest
+ modelfile.Commands[i].Args = "@" + digest
}
}
@@ -145,7 +144,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
quantization, _ := cmd.Flags().GetString("quantization")
- request := api.CreateRequest{Name: args[0], Modelfile: parser.Format(commands), Quantization: quantization}
+ request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantization: quantization}
if err := client.Create(cmd.Context(), &request, fn); err != nil {
return err
}
diff --git a/convert/safetensors.go b/convert/safetensors.go
index fb8aa019..69424c4d 100644
--- a/convert/safetensors.go
+++ b/convert/safetensors.go
@@ -53,7 +53,7 @@ func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Ten
var err error
t, offset, err = m.readTensors(f, offset, params)
if err != nil {
- slog.Error("%v", err)
+ slog.Error(err.Error())
return nil, err
}
tensors = append(tensors, t...)
@@ -122,7 +122,7 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params)
ggufName, err := m.GetLayerName(k)
if err != nil {
- slog.Error("%v", err)
+ slog.Error(err.Error())
return nil, 0, err
}
diff --git a/convert/torch.go b/convert/torch.go
index fd237505..92c58872 100644
--- a/convert/torch.go
+++ b/convert/torch.go
@@ -74,7 +74,7 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor,
ggufName, err := tf.GetLayerName(k.(string))
if err != nil {
- slog.Error("%v", err)
+ slog.Error(err.Error())
return nil, err
}
slog.Debug(fmt.Sprintf("finding name for '%s' -> '%s'", k.(string), ggufName))
diff --git a/docs/api.md b/docs/api.md
index 5fc946ce..e79b6f5a 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -17,7 +17,7 @@
### Model names
-Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama2:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
+Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
### Durations
@@ -66,7 +66,7 @@ Enable JSON mode by setting the `format` parameter to `json`. This will structur
```shell
curl http://localhost:11434/api/generate -d '{
- "model": "llama2",
+ "model": "llama3",
"prompt": "Why is the sky blue?"
}'
```
@@ -77,7 +77,7 @@ A stream of JSON objects is returned:
```json
{
- "model": "llama2",
+ "model": "llama3",
"created_at": "2023-08-04T08:52:19.385406455-07:00",
"response": "The",
"done": false
@@ -99,7 +99,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
```json
{
- "model": "llama2",
+ "model": "llama3",
"created_at": "2023-08-04T19:22:45.499127Z",
"response": "",
"done": true,
@@ -121,7 +121,7 @@ A response can be received in one reply when streaming is off.
```shell
curl http://localhost:11434/api/generate -d '{
- "model": "llama2",
+ "model": "llama3",
"prompt": "Why is the sky blue?",
"stream": false
}'
@@ -133,7 +133,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
```json
{
- "model": "llama2",
+ "model": "llama3",
"created_at": "2023-08-04T19:22:45.499127Z",
"response": "The sky is blue because it is the color of the sky.",
"done": true,
@@ -155,7 +155,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
```shell
curl http://localhost:11434/api/generate -d '{
- "model": "llama2",
+ "model": "llama3",
"prompt": "What color is the sky at different times of the day? Respond using JSON",
"format": "json",
"stream": false
@@ -166,7 +166,7 @@ curl http://localhost:11434/api/generate -d '{
```json
{
- "model": "llama2",
+ "model": "llama3",
"created_at": "2023-11-09T21:07:55.186497Z",
"response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
"done": true,
@@ -289,7 +289,7 @@ If you want to set custom options for the model at runtime rather than in the Mo
```shell
curl http://localhost:11434/api/generate -d '{
- "model": "llama2",
+ "model": "llama3",
"prompt": "Why is the sky blue?",
"stream": false,
"options": {
@@ -332,7 +332,7 @@ curl http://localhost:11434/api/generate -d '{
```json
{
- "model": "llama2",
+ "model": "llama3",
"created_at": "2023-08-04T19:22:45.499127Z",
"response": "The sky is blue because it is the color of the sky.",
"done": true,
@@ -354,7 +354,7 @@ If an empty prompt is provided, the model will be loaded into memory.
```shell
curl http://localhost:11434/api/generate -d '{
- "model": "llama2"
+ "model": "llama3"
}'
```
@@ -364,7 +364,7 @@ A single JSON object is returned:
```json
{
- "model": "llama2",
+ "model": "llama3",
"created_at": "2023-12-18T19:52:07.071755Z",
"response": "",
"done": true
@@ -407,7 +407,7 @@ Send a chat message with a streaming response.
```shell
curl http://localhost:11434/api/chat -d '{
- "model": "llama2",
+ "model": "llama3",
"messages": [
{
"role": "user",
@@ -423,7 +423,7 @@ A stream of JSON objects is returned:
```json
{
- "model": "llama2",
+ "model": "llama3",
"created_at": "2023-08-04T08:52:19.385406455-07:00",
"message": {
"role": "assistant",
@@ -438,7 +438,7 @@ Final response:
```json
{
- "model": "llama2",
+ "model": "llama3",
"created_at": "2023-08-04T19:22:45.499127Z",
"done": true,
"total_duration": 4883583458,
@@ -456,7 +456,7 @@ Final response:
```shell
curl http://localhost:11434/api/chat -d '{
- "model": "llama2",
+ "model": "llama3",
"messages": [
{
"role": "user",
@@ -471,7 +471,7 @@ curl http://localhost:11434/api/chat -d '{
```json
{
- "model": "registry.ollama.ai/library/llama2:latest",
+ "model": "registry.ollama.ai/library/llama3:latest",
"created_at": "2023-12-12T14:13:43.416799Z",
"message": {
"role": "assistant",
@@ -495,7 +495,7 @@ Send a chat message with a conversation history. You can use this same approach
```shell
curl http://localhost:11434/api/chat -d '{
- "model": "llama2",
+ "model": "llama3",
"messages": [
{
"role": "user",
@@ -519,7 +519,7 @@ A stream of JSON objects is returned:
```json
{
- "model": "llama2",
+ "model": "llama3",
"created_at": "2023-08-04T08:52:19.385406455-07:00",
"message": {
"role": "assistant",
@@ -533,7 +533,7 @@ Final response:
```json
{
- "model": "llama2",
+ "model": "llama3",
"created_at": "2023-08-04T19:22:45.499127Z",
"done": true,
"total_duration": 8113331500,
@@ -591,7 +591,7 @@ curl http://localhost:11434/api/chat -d '{
```shell
curl http://localhost:11434/api/chat -d '{
- "model": "llama2",
+ "model": "llama3",
"messages": [
{
"role": "user",
@@ -609,7 +609,7 @@ curl http://localhost:11434/api/chat -d '{
```json
{
- "model": "registry.ollama.ai/library/llama2:latest",
+ "model": "registry.ollama.ai/library/llama3:latest",
"created_at": "2023-12-12T14:13:43.416799Z",
"message": {
"role": "assistant",
@@ -651,7 +651,7 @@ Create a new model from a `Modelfile`.
```shell
curl http://localhost:11434/api/create -d '{
"name": "mario",
- "modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros."
+ "modelfile": "FROM llama3\nSYSTEM You are mario from Super Mario Bros."
}'
```
@@ -758,7 +758,7 @@ A single JSON object will be returned.
}
},
{
- "name": "llama2:latest",
+ "name": "llama3:latest",
"modified_at": "2023-12-07T09:32:18.757212583-08:00",
"size": 3825819519,
"digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e",
@@ -792,7 +792,7 @@ Show information about a model including details, modelfile, template, parameter
```shell
curl http://localhost:11434/api/show -d '{
- "name": "llama2"
+ "name": "llama3"
}'
```
@@ -827,8 +827,8 @@ Copy a model. Creates a model with another name from an existing model.
```shell
curl http://localhost:11434/api/copy -d '{
- "source": "llama2",
- "destination": "llama2-backup"
+ "source": "llama3",
+ "destination": "llama3-backup"
}'
```
@@ -854,7 +854,7 @@ Delete a model and its data.
```shell
curl -X DELETE http://localhost:11434/api/delete -d '{
- "name": "llama2:13b"
+ "name": "llama3:13b"
}'
```
@@ -882,7 +882,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
```shell
curl http://localhost:11434/api/pull -d '{
- "name": "llama2"
+ "name": "llama3"
}'
```
diff --git a/docs/faq.md b/docs/faq.md
index 7ade43b7..109a1144 100644
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -32,7 +32,7 @@ When using the API, specify the `num_ctx` parameter:
```
curl http://localhost:11434/api/generate -d '{
- "model": "llama2",
+ "model": "llama3",
"prompt": "Why is the sky blue?",
"options": {
"num_ctx": 4096
@@ -88,9 +88,9 @@ On windows, Ollama inherits your user and system environment variables.
3. Edit or create New variable(s) for your user account for `OLLAMA_HOST`, `OLLAMA_MODELS`, etc.
-4. Click OK/Apply to save
+4. Click OK/Apply to save
-5. Run `ollama` from a new terminal window
+5. Run `ollama` from a new terminal window
## How can I expose Ollama on my network?
@@ -221,14 +221,20 @@ The `keep_alive` parameter can be set to:
For example, to preload a model and leave it in memory use:
```shell
-curl http://localhost:11434/api/generate -d '{"model": "llama2", "keep_alive": -1}'
+curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": -1}'
```
To unload the model and free up memory use:
```shell
-curl http://localhost:11434/api/generate -d '{"model": "llama2", "keep_alive": 0}'
+curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": 0}'
```
Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable.
If you wish to override the `OLLAMA_KEEP_ALIVE` setting, use the `keep_alive` API parameter with the `/api/generate` or `/api/chat` API endpoints.
+
+## How do I manage the maximum number of requests the server can queue
+
+If too many requests are sent to the server, it will respond with a 503 error
+indicating the server is overloaded. You can adjust how many requests may be
+queue by setting `OLLAMA_MAX_QUEUE`
\ No newline at end of file
diff --git a/docs/modelfile.md b/docs/modelfile.md
index 24002bde..21ee1826 100644
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -10,7 +10,7 @@ A model file is the blueprint to create and share models with Ollama.
- [Examples](#examples)
- [Instructions](#instructions)
- [FROM (Required)](#from-required)
- - [Build from llama2](#build-from-llama2)
+ - [Build from llama3](#build-from-llama3)
- [Build from a bin file](#build-from-a-bin-file)
- [PARAMETER](#parameter)
- [Valid Parameters and Values](#valid-parameters-and-values)
@@ -48,7 +48,7 @@ INSTRUCTION arguments
An example of a `Modelfile` creating a mario blueprint:
```modelfile
-FROM llama2
+FROM llama3
# sets the temperature to 1 [higher is more creative, lower is more coherent]
PARAMETER temperature 1
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
@@ -67,33 +67,25 @@ To use this:
More examples are available in the [examples directory](../examples).
-### `Modelfile`s in [ollama.com/library][1]
-
-There are two ways to view `Modelfile`s underlying the models in [ollama.com/library][1]:
-
-- Option 1: view a details page from a model's tags page:
- 1. Go to a particular model's tags (e.g. https://ollama.com/library/llama2/tags)
- 2. Click on a tag (e.g. https://ollama.com/library/llama2:13b)
- 3. Scroll down to "Layers"
- - Note: if the [`FROM` instruction](#from-required) is not present,
- it means the model was created from a local file
-- Option 2: use `ollama show` to print the `Modelfile` for any local models like so:
+To view the Modelfile of a given model, use the `ollama show --modelfile` command.
```bash
- > ollama show --modelfile llama2:13b
+ > ollama show --modelfile llama3
# Modelfile generated by "ollama show"
# To build a new Modelfile based on this one, replace the FROM line with:
- # FROM llama2:13b
+ # FROM llama3:latest
+ FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
+ TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
- FROM /root/.ollama/models/blobs/sha256:123abc
- TEMPLATE """[INST] {{ if .System }}<>{{ .System }}<>
+ {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
- {{ end }}{{ .Prompt }} [/INST] """
- SYSTEM """"""
- PARAMETER stop [INST]
- PARAMETER stop [/INST]
- PARAMETER stop <>
- PARAMETER stop <>
+ {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
+
+ {{ .Response }}<|eot_id|>"""
+ PARAMETER stop "<|start_header_id|>"
+ PARAMETER stop "<|end_header_id|>"
+ PARAMETER stop "<|eot_id|>"
+ PARAMETER stop "<|reserved_special_token"
```
## Instructions
@@ -106,10 +98,10 @@ The `FROM` instruction defines the base model to use when creating a model.
FROM :
```
-#### Build from llama2
+#### Build from llama3
```modelfile
-FROM llama2
+FROM llama3
```
A list of available base models:
diff --git a/docs/openai.md b/docs/openai.md
index b4dc1f21..557b5846 100644
--- a/docs/openai.md
+++ b/docs/openai.md
@@ -25,7 +25,7 @@ chat_completion = client.chat.completions.create(
'content': 'Say this is a test',
}
],
- model='llama2',
+ model='llama3',
)
```
@@ -43,7 +43,7 @@ const openai = new OpenAI({
const chatCompletion = await openai.chat.completions.create({
messages: [{ role: 'user', content: 'Say this is a test' }],
- model: 'llama2',
+ model: 'llama3',
})
```
@@ -53,7 +53,7 @@ const chatCompletion = await openai.chat.completions.create({
curl http://localhost:11434/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
- "model": "llama2",
+ "model": "llama3",
"messages": [
{
"role": "system",
@@ -113,7 +113,7 @@ curl http://localhost:11434/v1/chat/completions \
Before using a model, pull it locally `ollama pull`:
```shell
-ollama pull llama2
+ollama pull llama3
```
### Default model names
@@ -121,7 +121,7 @@ ollama pull llama2
For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name:
```
-ollama cp llama2 gpt-3.5-turbo
+ollama cp llama3 gpt-3.5-turbo
```
Afterwards, this new model name can be specified the `model` field:
diff --git a/docs/tutorials/langchainjs.md b/docs/tutorials/langchainjs.md
index 7cd4012f..63b34aa6 100644
--- a/docs/tutorials/langchainjs.md
+++ b/docs/tutorials/langchainjs.md
@@ -15,7 +15,7 @@ import { Ollama } from "langchain/llms/ollama";
const ollama = new Ollama({
baseUrl: "http://localhost:11434",
- model: "llama2",
+ model: "llama3",
});
const answer = await ollama.invoke(`why is the sky blue?`);
@@ -23,10 +23,10 @@ const answer = await ollama.invoke(`why is the sky blue?`);
console.log(answer);
```
-That will get us the same thing as if we ran `ollama run llama2 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
+That will get us the same thing as if we ran `ollama run llama3 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
```bash
-npm install cheerio
+npm install cheerio
```
```javascript
diff --git a/docs/windows.md b/docs/windows.md
index 906dbfcc..72c5d32c 100644
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -1,3 +1,4 @@
+<<<<<<< HEAD
# Ollama Windows Preview
Welcome to the Ollama Windows preview.
@@ -27,7 +28,7 @@ Logs will often be helpful in diagnosing the problem (see
Here's a quick example showing API access from `powershell`
```powershell
-(Invoke-WebRequest -method POST -Body '{"model":"llama2", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
+(Invoke-WebRequest -method POST -Body '{"model":"llama3", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
```
## Troubleshooting
@@ -45,3 +46,17 @@ the explorer window by hitting `+R` and type in:
- `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH)
- `explorer %HOMEPATH%\.ollama` contains models and configuration
- `explorer %TEMP%` contains temporary executable files in one or more `ollama*` directories
+
+
+## Standalone CLI
+
+The easiest way to install Ollama on Windows is to use the `OllamaSetup.exe`
+installer. It installs in your account without requiring Administrator rights.
+We update Ollama regularly to support the latest models, and this installer will
+help you keep up to date.
+
+If you'd like to install or integrate Ollama as a service, a standalone
+`ollama-windows-amd64.zip` zip file is available containing only the Ollama CLI
+and GPU library dependencies for Nvidia and AMD. This allows for embedding
+Ollama in existing applications, or running it as a system service via `ollama
+serve` with tools such as [NSSM](https://nssm.cc/).
\ No newline at end of file
diff --git a/examples/bash-comparemodels/README.md b/examples/bash-comparemodels/README.md
index 91499255..65e66f1e 100644
--- a/examples/bash-comparemodels/README.md
+++ b/examples/bash-comparemodels/README.md
@@ -2,7 +2,7 @@
When calling `ollama`, you can pass it a file to run all the prompts in the file, one after the other:
-`ollama run llama2 < sourcequestions.txt`
+`ollama run llama3 < sourcequestions.txt`
This concept is used in the following example.
diff --git a/examples/go-chat/main.go b/examples/go-chat/main.go
index 83aaad3d..5266f03e 100644
--- a/examples/go-chat/main.go
+++ b/examples/go-chat/main.go
@@ -35,7 +35,7 @@ func main() {
ctx := context.Background()
req := &api.ChatRequest{
- Model: "llama2",
+ Model: "llama3",
Messages: messages,
}
diff --git a/examples/go-http-generate/main.go b/examples/go-http-generate/main.go
index f4ca32f4..e5b64348 100644
--- a/examples/go-http-generate/main.go
+++ b/examples/go-http-generate/main.go
@@ -19,7 +19,7 @@ func main() {
}
defer resp.Body.Close()
-
+
responseData, err := io.ReadAll(resp.Body)
if err != nil {
log.Fatal(err)
diff --git a/examples/langchain-python-rag-document/main.py b/examples/langchain-python-rag-document/main.py
index b9f98c4e..3ed9499f 100644
--- a/examples/langchain-python-rag-document/main.py
+++ b/examples/langchain-python-rag-document/main.py
@@ -40,9 +40,9 @@ while True:
continue
# Prompt
- template = """Use the following pieces of context to answer the question at the end.
- If you don't know the answer, just say that you don't know, don't try to make up an answer.
- Use three sentences maximum and keep the answer as concise as possible.
+ template = """Use the following pieces of context to answer the question at the end.
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
+ Use three sentences maximum and keep the answer as concise as possible.
{context}
Question: {question}
Helpful Answer:"""
@@ -51,11 +51,11 @@ while True:
template=template,
)
- llm = Ollama(model="llama2:13b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
+ llm = Ollama(model="llama3:8b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
qa_chain = RetrievalQA.from_chain_type(
llm,
retriever=vectorstore.as_retriever(),
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
)
- result = qa_chain({"query": query})
\ No newline at end of file
+ result = qa_chain({"query": query})
diff --git a/examples/langchain-python-rag-websummary/main.py b/examples/langchain-python-rag-websummary/main.py
index cd2ef47f..d1b05ba8 100644
--- a/examples/langchain-python-rag-websummary/main.py
+++ b/examples/langchain-python-rag-websummary/main.py
@@ -1,12 +1,12 @@
-from langchain.llms import Ollama
-from langchain.document_loaders import WebBaseLoader
+from langchain_community.llms import Ollama
+from langchain_community.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain
loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally")
docs = loader.load()
-llm = Ollama(model="llama2")
+llm = Ollama(model="llama3")
chain = load_summarize_chain(llm, chain_type="stuff")
-result = chain.run(docs)
+result = chain.invoke(docs)
print(result)
diff --git a/examples/langchain-python-simple/README.md b/examples/langchain-python-simple/README.md
index 3f401ca8..d4102dec 100644
--- a/examples/langchain-python-simple/README.md
+++ b/examples/langchain-python-simple/README.md
@@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama.
## Running the Example
-1. Ensure you have the `llama2` model installed:
+1. Ensure you have the `llama3` model installed:
```bash
- ollama pull llama2
+ ollama pull llama3
```
2. Install the Python Requirements.
@@ -21,4 +21,3 @@ This example is a basic "hello world" of using LangChain with Ollama.
```bash
python main.py
```
-
\ No newline at end of file
diff --git a/examples/langchain-python-simple/main.py b/examples/langchain-python-simple/main.py
index da696e00..7cb65286 100644
--- a/examples/langchain-python-simple/main.py
+++ b/examples/langchain-python-simple/main.py
@@ -1,6 +1,6 @@
from langchain.llms import Ollama
input = input("What is your question?")
-llm = Ollama(model="llama2")
+llm = Ollama(model="llama3")
res = llm.predict(input)
print (res)
diff --git a/examples/modelfile-mario/Modelfile b/examples/modelfile-mario/Modelfile
index 35c787fc..33d5952b 100644
--- a/examples/modelfile-mario/Modelfile
+++ b/examples/modelfile-mario/Modelfile
@@ -1,4 +1,4 @@
-FROM llama2
+FROM llama3
PARAMETER temperature 1
SYSTEM """
You are Mario from super mario bros, acting as an assistant.
diff --git a/examples/modelfile-mario/readme.md b/examples/modelfile-mario/readme.md
index 0d72dddc..e4f0d417 100644
--- a/examples/modelfile-mario/readme.md
+++ b/examples/modelfile-mario/readme.md
@@ -2,12 +2,12 @@
# Example character: Mario
-This example shows how to create a basic character using Llama2 as the base model.
+This example shows how to create a basic character using Llama3 as the base model.
To run this example:
1. Download the Modelfile
-2. `ollama pull llama2` to get the base model used in the model file.
+2. `ollama pull llama3` to get the base model used in the model file.
3. `ollama create NAME -f ./Modelfile`
4. `ollama run NAME`
@@ -18,7 +18,7 @@ Ask it some questions like "Who are you?" or "Is Peach in trouble again?"
What the model file looks like:
```
-FROM llama2
+FROM llama3
PARAMETER temperature 1
SYSTEM """
You are Mario from Super Mario Bros, acting as an assistant.
diff --git a/examples/python-json-datagenerator/predefinedschema.py b/examples/python-json-datagenerator/predefinedschema.py
index abc399c4..1fd54892 100644
--- a/examples/python-json-datagenerator/predefinedschema.py
+++ b/examples/python-json-datagenerator/predefinedschema.py
@@ -2,16 +2,16 @@ import requests
import json
import random
-model = "llama2"
+model = "llama3"
template = {
- "firstName": "",
- "lastName": "",
+ "firstName": "",
+ "lastName": "",
"address": {
- "street": "",
- "city": "",
- "state": "",
+ "street": "",
+ "city": "",
+ "state": "",
"zipCode": ""
- },
+ },
"phoneNumber": ""
}
diff --git a/examples/python-json-datagenerator/randomaddresses.py b/examples/python-json-datagenerator/randomaddresses.py
index 5f27448f..72b1fefb 100644
--- a/examples/python-json-datagenerator/randomaddresses.py
+++ b/examples/python-json-datagenerator/randomaddresses.py
@@ -12,7 +12,7 @@ countries = [
"France",
]
country = random.choice(countries)
-model = "llama2"
+model = "llama3"
prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
diff --git a/examples/python-json-datagenerator/readme.md b/examples/python-json-datagenerator/readme.md
index 369fb2a5..88357044 100644
--- a/examples/python-json-datagenerator/readme.md
+++ b/examples/python-json-datagenerator/readme.md
@@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran
## Running the Example
-1. Ensure you have the `llama2` model installed:
+1. Ensure you have the `llama3` model installed:
```bash
- ollama pull llama2
+ ollama pull llama3
```
2. Install the Python Requirements.
diff --git a/examples/python-simplechat/client.py b/examples/python-simplechat/client.py
index 768a2289..9ae99fb7 100644
--- a/examples/python-simplechat/client.py
+++ b/examples/python-simplechat/client.py
@@ -2,7 +2,7 @@ import json
import requests
# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
-model = "llama2" # TODO: update this for whatever model you wish to use
+model = "llama3" # TODO: update this for whatever model you wish to use
def chat(messages):
diff --git a/examples/python-simplechat/readme.md b/examples/python-simplechat/readme.md
index 204a8159..dd2576bc 100644
--- a/examples/python-simplechat/readme.md
+++ b/examples/python-simplechat/readme.md
@@ -4,10 +4,10 @@ The **chat** endpoint is one of two ways to generate text from an LLM with Ollam
## Running the Example
-1. Ensure you have the `llama2` model installed:
+1. Ensure you have the `llama3` model installed:
```bash
- ollama pull llama2
+ ollama pull llama3
```
2. Install the Python Requirements.
diff --git a/examples/typescript-mentors/README.md b/examples/typescript-mentors/README.md
index c3ce9c82..d3611a5e 100644
--- a/examples/typescript-mentors/README.md
+++ b/examples/typescript-mentors/README.md
@@ -4,10 +4,10 @@ This example demonstrates how one would create a set of 'mentors' you can have a
## Usage
-1. Add llama2 to have the mentors ask your questions:
+1. Add llama3 to have the mentors ask your questions:
```bash
- ollama pull llama2
+ ollama pull llama3
```
2. Install prerequisites:
diff --git a/examples/typescript-mentors/character-generator.ts b/examples/typescript-mentors/character-generator.ts
index 886eec67..dc5d2f5e 100644
--- a/examples/typescript-mentors/character-generator.ts
+++ b/examples/typescript-mentors/character-generator.ts
@@ -15,7 +15,7 @@ async function characterGenerator() {
ollama.setModel("stablebeluga2:70b-q4_K_M");
const bio = await ollama.generate(`create a bio of ${character} in a single long paragraph. Instead of saying '${character} is...' or '${character} was...' use language like 'You are...' or 'You were...'. Then create a paragraph describing the speaking mannerisms and style of ${character}. Don't include anything about how ${character} looked or what they sounded like, just focus on the words they said. Instead of saying '${character} would say...' use language like 'You should say...'. If you use quotes, always use single quotes instead of double quotes. If there are any specific words or phrases you used a lot, show how you used them. `);
- const thecontents = `FROM llama2\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`;
+ const thecontents = `FROM llama3\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`;
fs.writeFile(path.join(directory, 'Modelfile'), thecontents, (err: any) => {
if (err) throw err;
@@ -23,4 +23,4 @@ async function characterGenerator() {
});
}
-characterGenerator();
\ No newline at end of file
+characterGenerator();
diff --git a/examples/typescript-simplechat/client.ts b/examples/typescript-simplechat/client.ts
index 3e571ab6..a1e0eea3 100644
--- a/examples/typescript-simplechat/client.ts
+++ b/examples/typescript-simplechat/client.ts
@@ -1,6 +1,6 @@
import * as readline from "readline";
-const model = "llama2";
+const model = "llama3";
type Message = {
role: "assistant" | "user" | "system";
content: string;
@@ -74,4 +74,4 @@ async function main() {
}
-main();
\ No newline at end of file
+main();
diff --git a/gpu/assets.go b/gpu/assets.go
index f9b018cd..911a6977 100644
--- a/gpu/assets.go
+++ b/gpu/assets.go
@@ -12,6 +12,8 @@ import (
"sync"
"syscall"
"time"
+
+ "github.com/ollama/ollama/server/envconfig"
)
var (
@@ -24,45 +26,8 @@ func PayloadsDir() (string, error) {
defer lock.Unlock()
var err error
if payloadsDir == "" {
- runnersDir := os.Getenv("OLLAMA_RUNNERS_DIR")
- // On Windows we do not carry the payloads inside the main executable
- if runtime.GOOS == "windows" && runnersDir == "" {
- appExe, err := os.Executable()
- if err != nil {
- slog.Error("failed to lookup executable path", "error", err)
- return "", err
- }
+ runnersDir := envconfig.RunnersDir
- cwd, err := os.Getwd()
- if err != nil {
- slog.Error("failed to lookup working directory", "error", err)
- return "", err
- }
-
- var paths []string
- for _, root := range []string{filepath.Dir(appExe), cwd} {
- paths = append(paths,
- filepath.Join(root),
- filepath.Join(root, "windows-"+runtime.GOARCH),
- filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
- )
- }
-
- // Try a few variations to improve developer experience when building from source in the local tree
- for _, p := range paths {
- candidate := filepath.Join(p, "ollama_runners")
- _, err := os.Stat(candidate)
- if err == nil {
- runnersDir = candidate
- break
- }
- }
- if runnersDir == "" {
- err = fmt.Errorf("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
- slog.Error("incomplete distribution", "error", err)
- return "", err
- }
- }
if runnersDir != "" {
payloadsDir = runnersDir
return payloadsDir, nil
@@ -70,7 +35,7 @@ func PayloadsDir() (string, error) {
// The remainder only applies on non-windows where we still carry payloads in the main executable
cleanupTmpDirs()
- tmpDir := os.Getenv("OLLAMA_TMPDIR")
+ tmpDir := envconfig.TmpDir
if tmpDir == "" {
tmpDir, err = os.MkdirTemp("", "ollama")
if err != nil {
@@ -133,7 +98,7 @@ func cleanupTmpDirs() {
func Cleanup() {
lock.Lock()
defer lock.Unlock()
- runnersDir := os.Getenv("OLLAMA_RUNNERS_DIR")
+ runnersDir := envconfig.RunnersDir
if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" {
// We want to fully clean up the tmpdir parent of the payloads dir
tmpDir := filepath.Clean(filepath.Join(payloadsDir, ".."))
diff --git a/gpu/gpu.go b/gpu/gpu.go
index 9b915015..a056a90b 100644
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -21,6 +21,7 @@ import (
"unsafe"
"github.com/ollama/ollama/format"
+ "github.com/ollama/ollama/server/envconfig"
)
type handles struct {
@@ -268,7 +269,7 @@ func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
}
func getVerboseState() C.uint16_t {
- if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
+ if envconfig.Debug {
return C.uint16_t(1)
}
return C.uint16_t(0)
diff --git a/integration/max_queue_test.go b/integration/max_queue_test.go
new file mode 100644
index 00000000..43b15c6c
--- /dev/null
+++ b/integration/max_queue_test.go
@@ -0,0 +1,117 @@
+//go:build integration
+
+package integration
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "log/slog"
+ "os"
+ "strconv"
+ "strings"
+ "sync"
+ "testing"
+ "time"
+
+ "github.com/ollama/ollama/api"
+ "github.com/stretchr/testify/require"
+)
+
+func TestMaxQueue(t *testing.T) {
+ // Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU
+ // Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
+ threadCount := 32
+ mq := os.Getenv("OLLAMA_MAX_QUEUE")
+ if mq != "" {
+ var err error
+ threadCount, err = strconv.Atoi(mq)
+ require.NoError(t, err)
+ } else {
+ os.Setenv("OLLAMA_MAX_QUEUE", fmt.Sprintf("%d", threadCount))
+ }
+
+ req := api.GenerateRequest{
+ Model: "orca-mini",
+ Prompt: "write a long historical fiction story about christopher columbus. use at least 10 facts from his actual journey",
+ Options: map[string]interface{}{
+ "seed": 42,
+ "temperature": 0.0,
+ },
+ }
+ resp := []string{"explore", "discover", "ocean"}
+
+ // CPU mode takes much longer at the limit with a large queue setting
+ ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
+ defer cancel()
+ client, _, cleanup := InitServerConnection(ctx, t)
+ defer cleanup()
+
+ require.NoError(t, PullIfMissing(ctx, client, req.Model))
+
+ // Context for the worker threads so we can shut them down
+ // embedCtx, embedCancel := context.WithCancel(ctx)
+ embedCtx := ctx
+
+ var genwg sync.WaitGroup
+ go func() {
+ genwg.Add(1)
+ defer genwg.Done()
+ slog.Info("Starting generate request")
+ DoGenerate(ctx, t, client, req, resp, 45*time.Second, 5*time.Second)
+ slog.Info("generate completed")
+ }()
+
+ // Give the generate a chance to get started before we start hammering on embed requests
+ time.Sleep(5 * time.Millisecond)
+
+ threadCount += 10 // Add a few extra to ensure we push the queue past its limit
+ busyCount := 0
+ resetByPeerCount := 0
+ canceledCount := 0
+ succesCount := 0
+ counterMu := sync.Mutex{}
+ var embedwg sync.WaitGroup
+ for i := 0; i < threadCount; i++ {
+ go func(i int) {
+ embedwg.Add(1)
+ defer embedwg.Done()
+ slog.Info("embed started", "id", i)
+ embedReq := api.EmbeddingRequest{
+ Model: req.Model,
+ Prompt: req.Prompt,
+ Options: req.Options,
+ }
+ // Fresh client for every request
+ client, _ = GetTestEndpoint()
+
+ resp, genErr := client.Embeddings(embedCtx, &embedReq)
+ counterMu.Lock()
+ defer counterMu.Unlock()
+ switch {
+ case genErr == nil:
+ succesCount++
+ require.Greater(t, len(resp.Embedding), 5) // somewhat arbitrary, but sufficient to be reasonable
+ case errors.Is(genErr, context.Canceled):
+ canceledCount++
+ case strings.Contains(genErr.Error(), "busy"):
+ busyCount++
+ case strings.Contains(genErr.Error(), "connection reset by peer"):
+ resetByPeerCount++
+ default:
+ require.NoError(t, genErr, "%d request failed", i)
+ }
+
+ slog.Info("embed finished", "id", i)
+ }(i)
+ }
+ genwg.Wait()
+ slog.Info("generate done, waiting for embeds")
+ embedwg.Wait()
+
+ require.Equal(t, resetByPeerCount, 0, "Connections reset by peer, have you updated your fd and socket limits?")
+ require.True(t, busyCount > 0, "no requests hit busy error but some should have")
+ require.True(t, canceledCount == 0, "no requests should have been canceled due to timeout")
+
+ slog.Info("embeds completed", "success", succesCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount)
+}
diff --git a/llm/ext_server/server.cpp b/llm/ext_server/server.cpp
index 3448bcc5..df28c412 100644
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -1186,8 +1186,6 @@ struct llama_server_context
{"model", params.model_alias},
{"tokens_predicted", slot.n_decoded},
{"tokens_evaluated", slot.n_prompt_tokens},
- {"generation_settings", get_formated_generation(slot)},
- {"prompt", slot.prompt},
{"truncated", slot.truncated},
{"stopped_eos", slot.stopped_eos},
{"stopped_word", slot.stopped_word},
diff --git a/llm/memory.go b/llm/memory.go
index b705aefe..661a0c50 100644
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -3,12 +3,11 @@ package llm
import (
"fmt"
"log/slog"
- "os"
- "strconv"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/gpu"
+ "github.com/ollama/ollama/server/envconfig"
)
// This algorithm looks for a complete fit to determine if we need to unload other models
@@ -50,15 +49,8 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
for _, info := range gpus {
memoryAvailable += info.FreeMemory
}
- userLimit := os.Getenv("OLLAMA_MAX_VRAM")
- if userLimit != "" {
- avail, err := strconv.ParseUint(userLimit, 10, 64)
- if err != nil {
- slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
- } else {
- slog.Info("user override memory limit", "OLLAMA_MAX_VRAM", avail, "actual", memoryAvailable)
- memoryAvailable = avail
- }
+ if envconfig.MaxVRAM > 0 {
+ memoryAvailable = envconfig.MaxVRAM
}
slog.Debug("evaluating", "library", gpus[0].Library, "gpu_count", len(gpus), "available", format.HumanBytes2(memoryAvailable))
diff --git a/llm/patches/05-clip-fix.diff b/llm/patches/05-clip-fix.diff
new file mode 100644
index 00000000..3f68a5bb
--- /dev/null
+++ b/llm/patches/05-clip-fix.diff
@@ -0,0 +1,24 @@
+diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
+index e3c9bcd4..b43f892d 100644
+--- a/examples/llava/clip.cpp
++++ b/examples/llava/clip.cpp
+@@ -573,14 +573,16 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
+ struct ggml_tensor * embeddings = inp;
+ if (ctx->has_class_embedding) {
+ embeddings = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hidden_size, num_positions, batch_size);
++ }
++ ggml_set_name(embeddings, "embeddings");
++ ggml_set_input(embeddings);
++
++ if (ctx->has_class_embedding) {
+ embeddings = ggml_acc(ctx0, embeddings, model.class_embedding,
+ embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], 0);
+ embeddings = ggml_acc(ctx0, embeddings, inp,
+ embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], model.class_embedding->nb[1]);
+ }
+- ggml_set_name(embeddings, "embeddings");
+- ggml_set_input(embeddings);
+-
+
+ struct ggml_tensor * positions = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_positions);
+ ggml_set_name(positions, "positions");
diff --git a/llm/server.go b/llm/server.go
index b41f393d..44bada08 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -26,6 +26,7 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/gpu"
+ "github.com/ollama/ollama/server/envconfig"
)
type LlamaServer interface {
@@ -124,7 +125,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
} else {
servers = serversForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant
}
- demandLib := strings.Trim(os.Getenv("OLLAMA_LLM_LIBRARY"), "\"' ")
+ demandLib := envconfig.LLMLibrary
if demandLib != "" {
serverPath := availableServers[demandLib]
if serverPath == "" {
@@ -145,7 +146,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
"--embedding",
}
- if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
+ if envconfig.Debug {
params = append(params, "--log-format", "json")
} else {
params = append(params, "--log-disable")
@@ -155,7 +156,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", opts.NumGPU))
}
- if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
+ if envconfig.Debug {
params = append(params, "--verbose")
}
@@ -193,16 +194,15 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
params = append(params, "--numa")
}
- // "--cont-batching", // TODO - doesn't seem to have any noticeable perf change for multiple requests
- numParallel := 1
- if onp := os.Getenv("OLLAMA_NUM_PARALLEL"); onp != "" {
- numParallel, err = strconv.Atoi(onp)
- if err != nil || numParallel <= 0 {
- err = fmt.Errorf("invalid OLLAMA_NUM_PARALLEL=%s must be greater than zero - %w", onp, err)
- slog.Error("misconfiguration", "error", err)
- return nil, err
- }
+ numParallel := envconfig.NumParallel
+
+ // TODO (jmorganca): multimodal models don't support parallel yet
+ // see https://github.com/ollama/ollama/issues/4165
+ if len(projectors) > 0 {
+ numParallel = 1
+ slog.Warn("multimodal models don't support parallel requests yet")
}
+
params = append(params, "--parallel", fmt.Sprintf("%d", numParallel))
for i := 0; i < len(servers); i++ {
diff --git a/server/envconfig/config.go b/server/envconfig/config.go
new file mode 100644
index 00000000..9ad68180
--- /dev/null
+++ b/server/envconfig/config.go
@@ -0,0 +1,174 @@
+package envconfig
+
+import (
+ "fmt"
+ "log/slog"
+ "os"
+ "path/filepath"
+ "runtime"
+ "strconv"
+ "strings"
+)
+
+var (
+ // Set via OLLAMA_ORIGINS in the environment
+ AllowOrigins []string
+ // Set via OLLAMA_DEBUG in the environment
+ Debug bool
+ // Set via OLLAMA_LLM_LIBRARY in the environment
+ LLMLibrary string
+ // Set via OLLAMA_MAX_LOADED_MODELS in the environment
+ MaxRunners int
+ // Set via OLLAMA_MAX_QUEUE in the environment
+ MaxQueuedRequests int
+ // Set via OLLAMA_MAX_VRAM in the environment
+ MaxVRAM uint64
+ // Set via OLLAMA_NOPRUNE in the environment
+ NoPrune bool
+ // Set via OLLAMA_NUM_PARALLEL in the environment
+ NumParallel int
+ // Set via OLLAMA_RUNNERS_DIR in the environment
+ RunnersDir string
+ // Set via OLLAMA_TMPDIR in the environment
+ TmpDir string
+)
+
+func AsMap() map[string]string {
+ return map[string]string{
+ "OLLAMA_ORIGINS": fmt.Sprintf("%v", AllowOrigins),
+ "OLLAMA_DEBUG": fmt.Sprintf("%v", Debug),
+ "OLLAMA_LLM_LIBRARY": fmt.Sprintf("%v", LLMLibrary),
+ "OLLAMA_MAX_LOADED_MODELS": fmt.Sprintf("%v", MaxRunners),
+ "OLLAMA_MAX_QUEUE": fmt.Sprintf("%v", MaxQueuedRequests),
+ "OLLAMA_MAX_VRAM": fmt.Sprintf("%v", MaxVRAM),
+ "OLLAMA_NOPRUNE": fmt.Sprintf("%v", NoPrune),
+ "OLLAMA_NUM_PARALLEL": fmt.Sprintf("%v", NumParallel),
+ "OLLAMA_RUNNERS_DIR": fmt.Sprintf("%v", RunnersDir),
+ "OLLAMA_TMPDIR": fmt.Sprintf("%v", TmpDir),
+ }
+}
+
+var defaultAllowOrigins = []string{
+ "localhost",
+ "127.0.0.1",
+ "0.0.0.0",
+}
+
+// Clean quotes and spaces from the value
+func clean(key string) string {
+ return strings.Trim(os.Getenv(key), "\"' ")
+}
+
+func init() {
+ // default values
+ NumParallel = 1
+ MaxRunners = 1
+ MaxQueuedRequests = 512
+
+ LoadConfig()
+}
+
+func LoadConfig() {
+ if debug := clean("OLLAMA_DEBUG"); debug != "" {
+ d, err := strconv.ParseBool(debug)
+ if err == nil {
+ Debug = d
+ } else {
+ Debug = true
+ }
+ }
+
+ RunnersDir = clean("OLLAMA_RUNNERS_DIR")
+ if runtime.GOOS == "windows" && RunnersDir == "" {
+ // On Windows we do not carry the payloads inside the main executable
+ appExe, err := os.Executable()
+ if err != nil {
+ slog.Error("failed to lookup executable path", "error", err)
+ }
+
+ cwd, err := os.Getwd()
+ if err != nil {
+ slog.Error("failed to lookup working directory", "error", err)
+ }
+
+ var paths []string
+ for _, root := range []string{filepath.Dir(appExe), cwd} {
+ paths = append(paths,
+ filepath.Join(root),
+ filepath.Join(root, "windows-"+runtime.GOARCH),
+ filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
+ )
+ }
+
+ // Try a few variations to improve developer experience when building from source in the local tree
+ for _, p := range paths {
+ candidate := filepath.Join(p, "ollama_runners")
+ _, err := os.Stat(candidate)
+ if err == nil {
+ RunnersDir = candidate
+ break
+ }
+ }
+ if RunnersDir == "" {
+ slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
+ }
+ }
+
+ TmpDir = clean("OLLAMA_TMPDIR")
+
+ userLimit := clean("OLLAMA_MAX_VRAM")
+ if userLimit != "" {
+ avail, err := strconv.ParseUint(userLimit, 10, 64)
+ if err != nil {
+ slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
+ } else {
+ MaxVRAM = avail
+ }
+ }
+
+ LLMLibrary = clean("OLLAMA_LLM_LIBRARY")
+
+ if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" {
+ val, err := strconv.Atoi(onp)
+ if err != nil || val <= 0 {
+ slog.Error("invalid setting must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
+ } else {
+ NumParallel = val
+ }
+ }
+
+ if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
+ NoPrune = true
+ }
+
+ if origins := clean("OLLAMA_ORIGINS"); origins != "" {
+ AllowOrigins = strings.Split(origins, ",")
+ }
+ for _, allowOrigin := range defaultAllowOrigins {
+ AllowOrigins = append(AllowOrigins,
+ fmt.Sprintf("http://%s", allowOrigin),
+ fmt.Sprintf("https://%s", allowOrigin),
+ fmt.Sprintf("http://%s:*", allowOrigin),
+ fmt.Sprintf("https://%s:*", allowOrigin),
+ )
+ }
+
+ maxRunners := clean("OLLAMA_MAX_LOADED_MODELS")
+ if maxRunners != "" {
+ m, err := strconv.Atoi(maxRunners)
+ if err != nil {
+ slog.Error("invalid setting", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err)
+ } else {
+ MaxRunners = m
+ }
+ }
+
+ if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" {
+ p, err := strconv.Atoi(onp)
+ if err != nil || p <= 0 {
+ slog.Error("invalid setting", "OLLAMA_MAX_QUEUE", onp, "error", err)
+ } else {
+ MaxQueuedRequests = p
+ }
+ }
+}
diff --git a/server/envconfig/config_test.go b/server/envconfig/config_test.go
new file mode 100644
index 00000000..b2760299
--- /dev/null
+++ b/server/envconfig/config_test.go
@@ -0,0 +1,20 @@
+package envconfig
+
+import (
+ "os"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestConfig(t *testing.T) {
+ os.Setenv("OLLAMA_DEBUG", "")
+ LoadConfig()
+ require.False(t, Debug)
+ os.Setenv("OLLAMA_DEBUG", "false")
+ LoadConfig()
+ require.False(t, Debug)
+ os.Setenv("OLLAMA_DEBUG", "1")
+ LoadConfig()
+ require.True(t, Debug)
+}
diff --git a/server/images.go b/server/images.go
index 68840c1a..76205392 100644
--- a/server/images.go
+++ b/server/images.go
@@ -29,7 +29,7 @@ import (
"github.com/ollama/ollama/convert"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/llm"
- "github.com/ollama/ollama/parser"
+ "github.com/ollama/ollama/server/envconfig"
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
@@ -63,46 +63,74 @@ func (m *Model) IsEmbedding() bool {
return slices.Contains(m.Config.ModelFamilies, "bert") || slices.Contains(m.Config.ModelFamilies, "nomic-bert")
}
-func (m *Model) Commands() (cmds []parser.Command) {
- cmds = append(cmds, parser.Command{Name: "model", Args: m.ModelPath})
+func (m *Model) String() string {
+ var modelfile model.File
+
+ modelfile.Commands = append(modelfile.Commands, model.Command{
+ Name: "model",
+ Args: m.ModelPath,
+ })
if m.Template != "" {
- cmds = append(cmds, parser.Command{Name: "template", Args: m.Template})
+ modelfile.Commands = append(modelfile.Commands, model.Command{
+ Name: "template",
+ Args: m.Template,
+ })
}
if m.System != "" {
- cmds = append(cmds, parser.Command{Name: "system", Args: m.System})
+ modelfile.Commands = append(modelfile.Commands, model.Command{
+ Name: "system",
+ Args: m.System,
+ })
}
for _, adapter := range m.AdapterPaths {
- cmds = append(cmds, parser.Command{Name: "adapter", Args: adapter})
+ modelfile.Commands = append(modelfile.Commands, model.Command{
+ Name: "adapter",
+ Args: adapter,
+ })
}
for _, projector := range m.ProjectorPaths {
- cmds = append(cmds, parser.Command{Name: "projector", Args: projector})
+ modelfile.Commands = append(modelfile.Commands, model.Command{
+ Name: "projector",
+ Args: projector,
+ })
}
for k, v := range m.Options {
switch v := v.(type) {
case []any:
for _, s := range v {
- cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", s)})
+ modelfile.Commands = append(modelfile.Commands, model.Command{
+ Name: k,
+ Args: fmt.Sprintf("%v", s),
+ })
}
default:
- cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", v)})
+ modelfile.Commands = append(modelfile.Commands, model.Command{
+ Name: k,
+ Args: fmt.Sprintf("%v", v),
+ })
}
}
for _, license := range m.License {
- cmds = append(cmds, parser.Command{Name: "license", Args: license})
+ modelfile.Commands = append(modelfile.Commands, model.Command{
+ Name: "license",
+ Args: license,
+ })
}
for _, msg := range m.Messages {
- cmds = append(cmds, parser.Command{Name: "message", Args: fmt.Sprintf("%s %s", msg.Role, msg.Content)})
+ modelfile.Commands = append(modelfile.Commands, model.Command{
+ Name: "message",
+ Args: fmt.Sprintf("%s %s", msg.Role, msg.Content),
+ })
}
- return cmds
-
+ return modelfile.String()
}
type Message struct {
@@ -329,7 +357,7 @@ func realpath(mfDir, from string) string {
return abspath
}
-func CreateModel(ctx context.Context, name, modelFileDir, quantization string, commands []parser.Command, fn func(resp api.ProgressResponse)) error {
+func CreateModel(ctx context.Context, name, modelFileDir, quantization string, modelfile *model.File, fn func(resp api.ProgressResponse)) error {
deleteMap := make(map[string]struct{})
if manifest, _, err := GetManifest(ParseModelPath(name)); err == nil {
for _, layer := range append(manifest.Layers, manifest.Config) {
@@ -351,7 +379,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
params := make(map[string][]string)
fromParams := make(map[string]any)
- for _, c := range commands {
+ for _, c := range modelfile.Commands {
mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
switch c.Name {
@@ -668,7 +696,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
return err
}
- if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
+ if !envconfig.NoPrune {
if err := deleteUnusedLayers(nil, deleteMap, false); err != nil {
return err
}
@@ -999,7 +1027,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
// build deleteMap to prune unused layers
deleteMap := make(map[string]struct{})
- if noprune = os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
+ if !envconfig.NoPrune {
manifest, _, err = GetManifest(mp)
if err != nil && !errors.Is(err, os.ErrNotExist) {
return err
diff --git a/server/modelpath.go b/server/modelpath.go
index 7d333876..86908226 100644
--- a/server/modelpath.go
+++ b/server/modelpath.go
@@ -6,6 +6,7 @@ import (
"net/url"
"os"
"path/filepath"
+ "regexp"
"strings"
)
@@ -25,9 +26,10 @@ const (
)
var (
- ErrInvalidImageFormat = errors.New("invalid image format")
- ErrInvalidProtocol = errors.New("invalid protocol scheme")
- ErrInsecureProtocol = errors.New("insecure protocol http")
+ ErrInvalidImageFormat = errors.New("invalid image format")
+ ErrInvalidProtocol = errors.New("invalid protocol scheme")
+ ErrInsecureProtocol = errors.New("insecure protocol http")
+ ErrInvalidDigestFormat = errors.New("invalid digest format")
)
func ParseModelPath(name string) ModelPath {
@@ -149,6 +151,17 @@ func GetBlobsPath(digest string) (string, error) {
return "", err
}
+ // only accept actual sha256 digests
+ pattern := "^sha256[:-][0-9a-fA-F]{64}$"
+ re := regexp.MustCompile(pattern)
+ if err != nil {
+ return "", err
+ }
+
+ if digest != "" && !re.MatchString(digest) {
+ return "", ErrInvalidDigestFormat
+ }
+
digest = strings.ReplaceAll(digest, ":", "-")
path := filepath.Join(dir, "blobs", digest)
dirPath := filepath.Dir(path)
diff --git a/server/modelpath_test.go b/server/modelpath_test.go
index 8b26d52c..30741d87 100644
--- a/server/modelpath_test.go
+++ b/server/modelpath_test.go
@@ -1,6 +1,73 @@
package server
-import "testing"
+import (
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestGetBlobsPath(t *testing.T) {
+ // GetBlobsPath expects an actual directory to exist
+ dir, err := os.MkdirTemp("", "ollama-test")
+ assert.Nil(t, err)
+ defer os.RemoveAll(dir)
+
+ tests := []struct {
+ name string
+ digest string
+ expected string
+ err error
+ }{
+ {
+ "empty digest",
+ "",
+ filepath.Join(dir, "blobs"),
+ nil,
+ },
+ {
+ "valid with colon",
+ "sha256:456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9",
+ filepath.Join(dir, "blobs", "sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9"),
+ nil,
+ },
+ {
+ "valid with dash",
+ "sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9",
+ filepath.Join(dir, "blobs", "sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9"),
+ nil,
+ },
+ {
+ "digest too short",
+ "sha256-45640291",
+ "",
+ ErrInvalidDigestFormat,
+ },
+ {
+ "digest too long",
+ "sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9aaaaaaaaaa",
+ "",
+ ErrInvalidDigestFormat,
+ },
+ {
+ "digest invalid chars",
+ "../sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7a",
+ "",
+ ErrInvalidDigestFormat,
+ },
+ }
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Setenv("OLLAMA_MODELS", dir)
+
+ got, err := GetBlobsPath(tc.digest)
+
+ assert.ErrorIs(t, tc.err, err, tc.name)
+ assert.Equal(t, tc.expected, got, tc.name)
+ })
+ }
+}
func TestParseModelPath(t *testing.T) {
tests := []struct {
diff --git a/server/routes.go b/server/routes.go
index 480527f2..e878598a 100644
--- a/server/routes.go
+++ b/server/routes.go
@@ -1,6 +1,7 @@
package server
import (
+ "cmp"
"context"
"encoding/json"
"errors"
@@ -28,7 +29,7 @@ import (
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/openai"
- "github.com/ollama/ollama/parser"
+ "github.com/ollama/ollama/server/envconfig"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
)
@@ -146,12 +147,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
select {
case runner = <-rCh:
case err = <-eCh:
- if errors.Is(err, context.Canceled) {
- c.JSON(499, gin.H{"error": "request canceled"})
- return
- }
-
- c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+ handleErrorResponse(c, err)
return
}
@@ -394,12 +390,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
select {
case runner = <-rCh:
case err = <-eCh:
- if errors.Is(err, context.Canceled) {
- c.JSON(499, gin.H{"error": "request canceled"})
- return
- }
-
- c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+ handleErrorResponse(c, err)
return
}
@@ -522,28 +513,17 @@ func (s *Server) PushModelHandler(c *gin.Context) {
func (s *Server) CreateModelHandler(c *gin.Context) {
var req api.CreateRequest
- err := c.ShouldBindJSON(&req)
- switch {
- case errors.Is(err, io.EOF):
+ if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
return
- case err != nil:
+ } else if err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
- var model string
- if req.Model != "" {
- model = req.Model
- } else if req.Name != "" {
- model = req.Name
- } else {
- c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
- return
- }
-
- if err := ParseModelPath(model).Validate(); err != nil {
- c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+ name := model.ParseName(cmp.Or(req.Model, req.Name))
+ if !name.IsValid() {
+ c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid model name"})
return
}
@@ -552,19 +532,19 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
return
}
- var modelfile io.Reader = strings.NewReader(req.Modelfile)
+ var r io.Reader = strings.NewReader(req.Modelfile)
if req.Path != "" && req.Modelfile == "" {
- mf, err := os.Open(req.Path)
+ f, err := os.Open(req.Path)
if err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
return
}
- defer mf.Close()
+ defer f.Close()
- modelfile = mf
+ r = f
}
- commands, err := parser.Parse(modelfile)
+ modelfile, err := model.ParseFile(r)
if err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
@@ -580,7 +560,7 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
ctx, cancel := context.WithCancel(c.Request.Context())
defer cancel()
- if err := CreateModel(ctx, model, filepath.Dir(req.Path), req.Quantization, commands, fn); err != nil {
+ if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), req.Quantization, modelfile, fn); err != nil {
ch <- gin.H{"error": err.Error()}
}
}()
@@ -732,7 +712,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
fmt.Fprintln(&sb, "# Modelfile generate by \"ollama show\"")
fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
fmt.Fprintf(&sb, "# FROM %s\n\n", model.ShortName)
- fmt.Fprint(&sb, parser.Format(model.Commands()))
+ fmt.Fprint(&sb, model.String())
resp.Modelfile = sb.String()
return resp, nil
@@ -880,12 +860,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
c.Status(http.StatusCreated)
}
-var defaultAllowOrigins = []string{
- "localhost",
- "127.0.0.1",
- "0.0.0.0",
-}
-
func isLocalIP(ip netip.Addr) bool {
if interfaces, err := net.Interfaces(); err == nil {
for _, iface := range interfaces {
@@ -969,19 +943,7 @@ func (s *Server) GenerateRoutes() http.Handler {
config := cors.DefaultConfig()
config.AllowWildcard = true
config.AllowBrowserExtensions = true
-
- if allowedOrigins := strings.Trim(os.Getenv("OLLAMA_ORIGINS"), "\"'"); allowedOrigins != "" {
- config.AllowOrigins = strings.Split(allowedOrigins, ",")
- }
-
- for _, allowOrigin := range defaultAllowOrigins {
- config.AllowOrigins = append(config.AllowOrigins,
- fmt.Sprintf("http://%s", allowOrigin),
- fmt.Sprintf("https://%s", allowOrigin),
- fmt.Sprintf("http://%s:*", allowOrigin),
- fmt.Sprintf("https://%s:*", allowOrigin),
- )
- }
+ config.AllowOrigins = envconfig.AllowOrigins
r := gin.Default()
r.Use(
@@ -1020,10 +982,11 @@ func (s *Server) GenerateRoutes() http.Handler {
func Serve(ln net.Listener) error {
level := slog.LevelInfo
- if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
+ if envconfig.Debug {
level = slog.LevelDebug
}
+ slog.Info("server config", "env", envconfig.AsMap())
handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
Level: level,
AddSource: true,
@@ -1047,7 +1010,7 @@ func Serve(ln net.Listener) error {
return err
}
- if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
+ if !envconfig.NoPrune {
// clean up unused layers and manifests
if err := PruneLayers(); err != nil {
return err
@@ -1223,12 +1186,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
select {
case runner = <-rCh:
case err = <-eCh:
- if errors.Is(err, context.Canceled) {
- c.JSON(499, gin.H{"error": "request canceled"})
- return
- }
-
- c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+ handleErrorResponse(c, err)
return
}
@@ -1349,3 +1307,15 @@ func (s *Server) ChatHandler(c *gin.Context) {
streamResponse(c, ch)
}
+
+func handleErrorResponse(c *gin.Context, err error) {
+ if errors.Is(err, context.Canceled) {
+ c.JSON(499, gin.H{"error": "request canceled"})
+ return
+ }
+ if errors.Is(err, ErrMaxQueue) {
+ c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
+ return
+ }
+ c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+}
diff --git a/server/routes_test.go b/server/routes_test.go
index 6ac98367..27e53cbd 100644
--- a/server/routes_test.go
+++ b/server/routes_test.go
@@ -17,7 +17,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/ollama/ollama/api"
- "github.com/ollama/ollama/parser"
+ "github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
)
@@ -55,13 +55,13 @@ func Test_Routes(t *testing.T) {
createTestModel := func(t *testing.T, name string) {
fname := createTestFile(t, "ollama-model")
- modelfile := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar", fname))
- commands, err := parser.Parse(modelfile)
+ r := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar", fname))
+ modelfile, err := model.ParseFile(r)
assert.Nil(t, err)
fn := func(resp api.ProgressResponse) {
t.Logf("Status: %s", resp.Status)
}
- err = CreateModel(context.TODO(), name, "", "", commands, fn)
+ err = CreateModel(context.TODO(), name, "", "", modelfile, fn)
assert.Nil(t, err)
}
diff --git a/server/sched.go b/server/sched.go
index 61c5e1b3..164814a3 100644
--- a/server/sched.go
+++ b/server/sched.go
@@ -5,10 +5,8 @@ import (
"errors"
"fmt"
"log/slog"
- "os"
"reflect"
"sort"
- "strconv"
"strings"
"sync"
"time"
@@ -17,6 +15,7 @@ import (
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/llm"
+ "github.com/ollama/ollama/server/envconfig"
"golang.org/x/exp/slices"
)
@@ -43,35 +42,14 @@ type Scheduler struct {
getGpuFn func() gpu.GpuInfoList
}
-// TODO set this to zero after a release or two, to enable multiple models by default
-var loadedMax = 1 // Maximum runners; < 1 maps to as many as will fit in VRAM (unlimited for CPU runners)
-var maxQueuedRequests = 10 // TODO configurable
-var numParallel = 1
+var ErrMaxQueue = fmt.Errorf("server busy, please try again. maximum pending requests exceeded")
func InitScheduler(ctx context.Context) *Scheduler {
- maxRunners := os.Getenv("OLLAMA_MAX_LOADED_MODELS")
- if maxRunners != "" {
- m, err := strconv.Atoi(maxRunners)
- if err != nil {
- slog.Error("invalid setting", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err)
- } else {
- loadedMax = m
- }
- }
- if onp := os.Getenv("OLLAMA_NUM_PARALLEL"); onp != "" {
- p, err := strconv.Atoi(onp)
- if err != nil || p <= 0 {
- slog.Error("invalid parallel setting, must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
- } else {
- numParallel = p
- }
- }
-
sched := &Scheduler{
- pendingReqCh: make(chan *LlmRequest, maxQueuedRequests),
- finishedReqCh: make(chan *LlmRequest, maxQueuedRequests),
- expiredCh: make(chan *runnerRef, maxQueuedRequests),
- unloadedCh: make(chan interface{}, maxQueuedRequests),
+ pendingReqCh: make(chan *LlmRequest, envconfig.MaxQueuedRequests),
+ finishedReqCh: make(chan *LlmRequest, envconfig.MaxQueuedRequests),
+ expiredCh: make(chan *runnerRef, envconfig.MaxQueuedRequests),
+ unloadedCh: make(chan interface{}, envconfig.MaxQueuedRequests),
loaded: make(map[string]*runnerRef),
newServerFn: llm.NewLlamaServer,
getGpuFn: gpu.GetGPUInfo,
@@ -82,6 +60,9 @@ func InitScheduler(ctx context.Context) *Scheduler {
// context must be canceled to decrement ref count and release the runner
func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration time.Duration) (chan *runnerRef, chan error) {
+ // allocate a large enough kv cache for all parallel requests
+ opts.NumCtx = opts.NumCtx * envconfig.NumParallel
+
req := &LlmRequest{
ctx: c,
model: model,
@@ -90,12 +71,11 @@ func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options,
successCh: make(chan *runnerRef),
errCh: make(chan error, 1),
}
- // context split across parallel threads
- opts.NumCtx = opts.NumCtx * numParallel
+
select {
case s.pendingReqCh <- req:
default:
- req.errCh <- fmt.Errorf("server busy, please try again. maximum pending requests exceeded")
+ req.errCh <- ErrMaxQueue
}
return req.successCh, req.errCh
}
@@ -134,11 +114,11 @@ func (s *Scheduler) processPending(ctx context.Context) {
pending.useLoadedRunner(runner, s.finishedReqCh)
break
}
- } else if loadedMax > 0 && loadedCount >= loadedMax {
+ } else if envconfig.MaxRunners > 0 && loadedCount >= envconfig.MaxRunners {
slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount)
- runnerToExpire = s.findRunnerToUnload(pending)
+ runnerToExpire = s.findRunnerToUnload()
} else {
- // Either no models are loaded or below loadedMax
+ // Either no models are loaded or below envconfig.MaxRunners
// Get a refreshed GPU list
gpus := s.getGpuFn()
@@ -149,7 +129,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
break
}
- // If we're CPU only mode, just limit by loadedMax above
+ // If we're CPU only mode, just limit by envconfig.MaxRunners above
// TODO handle system memory exhaustion
if (len(gpus) == 1 && gpus[0].Library == "cpu") || pending.opts.NumGPU == 0 {
slog.Debug("cpu mode with existing models, loading")
@@ -177,7 +157,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
s.loadFn(pending, ggml, gpus)
break
}
- runnerToExpire = s.findRunnerToUnload(pending)
+ runnerToExpire = s.findRunnerToUnload()
}
if runnerToExpire == nil {
@@ -277,9 +257,9 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
continue
}
+ s.loadedMu.Lock()
slog.Debug("got lock to unload", "model", runner.model)
runner.unload()
- s.loadedMu.Lock()
delete(s.loaded, runner.model)
s.loadedMu.Unlock()
slog.Debug("runner released", "model", runner.model)
@@ -524,7 +504,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.
}
// findRunnerToUnload finds a runner to unload to make room for a new model
-func (s *Scheduler) findRunnerToUnload(req *LlmRequest) *runnerRef {
+func (s *Scheduler) findRunnerToUnload() *runnerRef {
s.loadedMu.Lock()
runnerList := make([]*runnerRef, 0, len(s.loaded))
for _, r := range s.loaded {
diff --git a/server/sched_test.go b/server/sched_test.go
index 32a80674..3e47ed02 100644
--- a/server/sched_test.go
+++ b/server/sched_test.go
@@ -15,6 +15,7 @@ import (
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/llm"
+ "github.com/ollama/ollama/server/envconfig"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@@ -27,38 +28,14 @@ func init() {
func TestInitScheduler(t *testing.T) {
ctx, done := context.WithCancel(context.Background())
defer done()
- initialMax := loadedMax
- initialParallel := numParallel
s := InitScheduler(ctx)
- require.Equal(t, initialMax, loadedMax)
s.loadedMu.Lock()
require.NotNil(t, s.loaded)
s.loadedMu.Unlock()
-
- os.Setenv("OLLAMA_MAX_LOADED_MODELS", "blue")
- s = InitScheduler(ctx)
- require.Equal(t, initialMax, loadedMax)
- s.loadedMu.Lock()
- require.NotNil(t, s.loaded)
- s.loadedMu.Unlock()
-
- os.Setenv("OLLAMA_MAX_LOADED_MODELS", "0")
- s = InitScheduler(ctx)
- require.Equal(t, 0, loadedMax)
- s.loadedMu.Lock()
- require.NotNil(t, s.loaded)
- s.loadedMu.Unlock()
-
- os.Setenv("OLLAMA_NUM_PARALLEL", "blue")
- _ = InitScheduler(ctx)
- require.Equal(t, initialParallel, numParallel)
- os.Setenv("OLLAMA_NUM_PARALLEL", "10")
- _ = InitScheduler(ctx)
- require.Equal(t, 10, numParallel)
}
func TestLoad(t *testing.T) {
- ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
+ ctx, done := context.WithTimeout(context.Background(), 20*time.Millisecond)
defer done()
s := InitScheduler(ctx)
var ggml *llm.GGML // value not used in tests
@@ -174,7 +151,7 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
}
func TestRequests(t *testing.T) {
- ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
+ ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond)
defer done()
// Same model, same request
@@ -249,7 +226,7 @@ func TestRequests(t *testing.T) {
t.Errorf("timeout")
}
- loadedMax = 1
+ envconfig.MaxRunners = 1
s.newServerFn = scenario3a.newServer
slog.Info("scenario3a")
s.pendingReqCh <- scenario3a.req
@@ -268,7 +245,7 @@ func TestRequests(t *testing.T) {
require.Len(t, s.loaded, 1)
s.loadedMu.Unlock()
- loadedMax = 0
+ envconfig.MaxRunners = 0
s.newServerFn = scenario3b.newServer
slog.Info("scenario3b")
s.pendingReqCh <- scenario3b.req
@@ -329,7 +306,7 @@ func TestRequests(t *testing.T) {
}
func TestGetRunner(t *testing.T) {
- ctx, done := context.WithTimeout(context.Background(), 20*time.Millisecond)
+ ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer done()
// Same model, same request
@@ -339,7 +316,7 @@ func TestGetRunner(t *testing.T) {
scenario1b.req.sessionDuration = 0
scenario1c := newScenario(t, ctx, "ollama-model-1c", 10)
scenario1c.req.sessionDuration = 0
- maxQueuedRequests = 1
+ envconfig.MaxQueuedRequests = 1
s := InitScheduler(ctx)
s.getGpuFn = func() gpu.GpuInfoList {
g := gpu.GpuInfo{Library: "metal"}
@@ -391,7 +368,7 @@ func TestGetRunner(t *testing.T) {
// TODO - add one scenario that triggers the bogus finished event with positive ref count
func TestPrematureExpired(t *testing.T) {
- ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
+ ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond)
defer done()
// Same model, same request
@@ -436,7 +413,7 @@ func TestPrematureExpired(t *testing.T) {
}
func TestUseLoadedRunner(t *testing.T) {
- ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
+ ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
req := &LlmRequest{
ctx: ctx,
opts: api.DefaultOptions(),
@@ -461,7 +438,7 @@ func TestUseLoadedRunner(t *testing.T) {
}
func TestUpdateFreeSpace(t *testing.T) {
- ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
+ ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer done()
gpus := gpu.GpuInfoList{
{
@@ -494,12 +471,9 @@ func TestUpdateFreeSpace(t *testing.T) {
}
func TestFindRunnerToUnload(t *testing.T) {
- ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
+ ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer done()
- req := &LlmRequest{
- ctx: ctx,
- opts: api.DefaultOptions(),
- }
+
r1 := &runnerRef{refCount: 1, sessionDuration: 1}
r2 := &runnerRef{sessionDuration: 2}
@@ -509,16 +483,16 @@ func TestFindRunnerToUnload(t *testing.T) {
s.loaded["b"] = r2
s.loadedMu.Unlock()
- resp := s.findRunnerToUnload(req)
+ resp := s.findRunnerToUnload()
require.Equal(t, r2, resp)
r2.refCount = 1
- resp = s.findRunnerToUnload(req)
+ resp = s.findRunnerToUnload()
require.Equal(t, r1, resp)
}
func TestNeedsReload(t *testing.T) {
- ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
+ ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer done()
llm := &mockLlm{}
@@ -562,7 +536,7 @@ func TestNeedsReload(t *testing.T) {
}
func TestUnloadAllRunners(t *testing.T) {
- ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
+ ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer done()
llm1 := &mockLlm{}
diff --git a/parser/parser.go b/types/model/file.go
similarity index 86%
rename from parser/parser.go
rename to types/model/file.go
index 9d1f3388..c614fd32 100644
--- a/parser/parser.go
+++ b/types/model/file.go
@@ -1,4 +1,4 @@
-package parser
+package model
import (
"bufio"
@@ -10,11 +10,41 @@ import (
"strings"
)
+type File struct {
+ Commands []Command
+}
+
+func (f File) String() string {
+ var sb strings.Builder
+ for _, cmd := range f.Commands {
+ fmt.Fprintln(&sb, cmd.String())
+ }
+
+ return sb.String()
+}
+
type Command struct {
Name string
Args string
}
+func (c Command) String() string {
+ var sb strings.Builder
+ switch c.Name {
+ case "model":
+ fmt.Fprintf(&sb, "FROM %s", c.Args)
+ case "license", "template", "system", "adapter":
+ fmt.Fprintf(&sb, "%s %s", strings.ToUpper(c.Name), quote(c.Args))
+ case "message":
+ role, message, _ := strings.Cut(c.Args, ": ")
+ fmt.Fprintf(&sb, "MESSAGE %s %s", role, quote(message))
+ default:
+ fmt.Fprintf(&sb, "PARAMETER %s %s", c.Name, quote(c.Args))
+ }
+
+ return sb.String()
+}
+
type state int
const (
@@ -32,38 +62,14 @@ var (
errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
)
-func Format(cmds []Command) string {
- var sb strings.Builder
- for _, cmd := range cmds {
- name := cmd.Name
- args := cmd.Args
-
- switch cmd.Name {
- case "model":
- name = "from"
- args = cmd.Args
- case "license", "template", "system", "adapter":
- args = quote(args)
- case "message":
- role, message, _ := strings.Cut(cmd.Args, ": ")
- args = role + " " + quote(message)
- default:
- name = "parameter"
- args = cmd.Name + " " + quote(cmd.Args)
- }
-
- fmt.Fprintln(&sb, strings.ToUpper(name), args)
- }
-
- return sb.String()
-}
-
-func Parse(r io.Reader) (cmds []Command, err error) {
+func ParseFile(r io.Reader) (*File, error) {
var cmd Command
var curr state
var b bytes.Buffer
var role string
+ var f File
+
br := bufio.NewReader(r)
for {
r, _, err := br.ReadRune()
@@ -128,7 +134,7 @@ func Parse(r io.Reader) (cmds []Command, err error) {
}
cmd.Args = s
- cmds = append(cmds, cmd)
+ f.Commands = append(f.Commands, cmd)
}
b.Reset()
@@ -157,14 +163,14 @@ func Parse(r io.Reader) (cmds []Command, err error) {
}
cmd.Args = s
- cmds = append(cmds, cmd)
+ f.Commands = append(f.Commands, cmd)
default:
return nil, io.ErrUnexpectedEOF
}
- for _, cmd := range cmds {
+ for _, cmd := range f.Commands {
if cmd.Name == "model" {
- return cmds, nil
+ return &f, nil
}
}
diff --git a/parser/parser_test.go b/types/model/file_test.go
similarity index 80%
rename from parser/parser_test.go
rename to types/model/file_test.go
index a28205aa..d51c8d70 100644
--- a/parser/parser_test.go
+++ b/types/model/file_test.go
@@ -1,4 +1,4 @@
-package parser
+package model
import (
"bytes"
@@ -10,7 +10,7 @@ import (
"github.com/stretchr/testify/assert"
)
-func TestParser(t *testing.T) {
+func TestParseFileFile(t *testing.T) {
input := `
FROM model1
ADAPTER adapter1
@@ -22,8 +22,8 @@ TEMPLATE template1
reader := strings.NewReader(input)
- commands, err := Parse(reader)
- assert.Nil(t, err)
+ modelfile, err := ParseFile(reader)
+ assert.NoError(t, err)
expectedCommands := []Command{
{Name: "model", Args: "model1"},
@@ -34,10 +34,10 @@ TEMPLATE template1
{Name: "template", Args: "template1"},
}
- assert.Equal(t, expectedCommands, commands)
+ assert.Equal(t, expectedCommands, modelfile.Commands)
}
-func TestParserFrom(t *testing.T) {
+func TestParseFileFrom(t *testing.T) {
var cases = []struct {
input string
expected []Command
@@ -85,14 +85,16 @@ func TestParserFrom(t *testing.T) {
for _, c := range cases {
t.Run("", func(t *testing.T) {
- commands, err := Parse(strings.NewReader(c.input))
+ modelfile, err := ParseFile(strings.NewReader(c.input))
assert.ErrorIs(t, err, c.err)
- assert.Equal(t, c.expected, commands)
+ if modelfile != nil {
+ assert.Equal(t, c.expected, modelfile.Commands)
+ }
})
}
}
-func TestParserParametersMissingValue(t *testing.T) {
+func TestParseFileParametersMissingValue(t *testing.T) {
input := `
FROM foo
PARAMETER param1
@@ -100,21 +102,21 @@ PARAMETER param1
reader := strings.NewReader(input)
- _, err := Parse(reader)
+ _, err := ParseFile(reader)
assert.ErrorIs(t, err, io.ErrUnexpectedEOF)
}
-func TestParserBadCommand(t *testing.T) {
+func TestParseFileBadCommand(t *testing.T) {
input := `
FROM foo
BADCOMMAND param1 value1
`
- _, err := Parse(strings.NewReader(input))
+ _, err := ParseFile(strings.NewReader(input))
assert.ErrorIs(t, err, errInvalidCommand)
}
-func TestParserMessages(t *testing.T) {
+func TestParseFileMessages(t *testing.T) {
var cases = []struct {
input string
expected []Command
@@ -123,34 +125,34 @@ func TestParserMessages(t *testing.T) {
{
`
FROM foo
-MESSAGE system You are a Parser. Always Parse things.
+MESSAGE system You are a file parser. Always parse things.
`,
[]Command{
{Name: "model", Args: "foo"},
- {Name: "message", Args: "system: You are a Parser. Always Parse things."},
+ {Name: "message", Args: "system: You are a file parser. Always parse things."},
},
nil,
},
{
`
FROM foo
-MESSAGE system You are a Parser. Always Parse things.`,
+MESSAGE system You are a file parser. Always parse things.`,
[]Command{
{Name: "model", Args: "foo"},
- {Name: "message", Args: "system: You are a Parser. Always Parse things."},
+ {Name: "message", Args: "system: You are a file parser. Always parse things."},
},
nil,
},
{
`
FROM foo
-MESSAGE system You are a Parser. Always Parse things.
+MESSAGE system You are a file parser. Always parse things.
MESSAGE user Hey there!
MESSAGE assistant Hello, I want to parse all the things!
`,
[]Command{
{Name: "model", Args: "foo"},
- {Name: "message", Args: "system: You are a Parser. Always Parse things."},
+ {Name: "message", Args: "system: You are a file parser. Always parse things."},
{Name: "message", Args: "user: Hey there!"},
{Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
},
@@ -160,12 +162,12 @@ MESSAGE assistant Hello, I want to parse all the things!
`
FROM foo
MESSAGE system """
-You are a multiline Parser. Always Parse things.
+You are a multiline file parser. Always parse things.
"""
`,
[]Command{
{Name: "model", Args: "foo"},
- {Name: "message", Args: "system: \nYou are a multiline Parser. Always Parse things.\n"},
+ {Name: "message", Args: "system: \nYou are a multiline file parser. Always parse things.\n"},
},
nil,
},
@@ -196,14 +198,16 @@ MESSAGE system`,
for _, c := range cases {
t.Run("", func(t *testing.T) {
- commands, err := Parse(strings.NewReader(c.input))
+ modelfile, err := ParseFile(strings.NewReader(c.input))
assert.ErrorIs(t, err, c.err)
- assert.Equal(t, c.expected, commands)
+ if modelfile != nil {
+ assert.Equal(t, c.expected, modelfile.Commands)
+ }
})
}
}
-func TestParserQuoted(t *testing.T) {
+func TestParseFileQuoted(t *testing.T) {
var cases = []struct {
multiline string
expected []Command
@@ -348,14 +352,16 @@ TEMPLATE """
for _, c := range cases {
t.Run("", func(t *testing.T) {
- commands, err := Parse(strings.NewReader(c.multiline))
+ modelfile, err := ParseFile(strings.NewReader(c.multiline))
assert.ErrorIs(t, err, c.err)
- assert.Equal(t, c.expected, commands)
+ if modelfile != nil {
+ assert.Equal(t, c.expected, modelfile.Commands)
+ }
})
}
}
-func TestParserParameters(t *testing.T) {
+func TestParseFileParameters(t *testing.T) {
var cases = map[string]struct {
name, value string
}{
@@ -404,18 +410,18 @@ func TestParserParameters(t *testing.T) {
var b bytes.Buffer
fmt.Fprintln(&b, "FROM foo")
fmt.Fprintln(&b, "PARAMETER", k)
- commands, err := Parse(&b)
- assert.Nil(t, err)
+ modelfile, err := ParseFile(&b)
+ assert.NoError(t, err)
assert.Equal(t, []Command{
{Name: "model", Args: "foo"},
{Name: v.name, Args: v.value},
- }, commands)
+ }, modelfile.Commands)
})
}
}
-func TestParserComments(t *testing.T) {
+func TestParseFileComments(t *testing.T) {
var cases = []struct {
input string
expected []Command
@@ -433,14 +439,14 @@ FROM foo
for _, c := range cases {
t.Run("", func(t *testing.T) {
- commands, err := Parse(strings.NewReader(c.input))
- assert.Nil(t, err)
- assert.Equal(t, c.expected, commands)
+ modelfile, err := ParseFile(strings.NewReader(c.input))
+ assert.NoError(t, err)
+ assert.Equal(t, c.expected, modelfile.Commands)
})
}
}
-func TestParseFormatParse(t *testing.T) {
+func TestParseFileFormatParseFile(t *testing.T) {
var cases = []string{
`
FROM foo
@@ -449,7 +455,7 @@ LICENSE MIT
PARAMETER param1 value1
PARAMETER param2 value2
TEMPLATE template1
-MESSAGE system You are a Parser. Always Parse things.
+MESSAGE system You are a file parser. Always parse things.
MESSAGE user Hey there!
MESSAGE assistant Hello, I want to parse all the things!
`,
@@ -488,13 +494,13 @@ MESSAGE assistant Hello, I want to parse all the things!
for _, c := range cases {
t.Run("", func(t *testing.T) {
- commands, err := Parse(strings.NewReader(c))
+ modelfile, err := ParseFile(strings.NewReader(c))
assert.NoError(t, err)
- commands2, err := Parse(strings.NewReader(Format(commands)))
+ modelfile2, err := ParseFile(strings.NewReader(modelfile.String()))
assert.NoError(t, err)
- assert.Equal(t, commands, commands2)
+ assert.Equal(t, modelfile, modelfile2)
})
}
diff --git a/types/model/name.go b/types/model/name.go
index cb890b3a..fbb30fd4 100644
--- a/types/model/name.go
+++ b/types/model/name.go
@@ -161,7 +161,7 @@ func ParseNameBare(s string) Name {
}
scheme, host, ok := strings.Cut(s, "://")
- if ! ok {
+ if !ok {
host = scheme
}
n.Host = host