diff --git a/README.md b/README.md index 10623107..a9601ec2 100644 --- a/README.md +++ b/README.md @@ -344,6 +344,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa) - [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example) - [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java) +- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs) - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html) - [LiteLLM](https://github.com/BerriAI/litellm) - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp) @@ -399,6 +400,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support) - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation) - [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities. +- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server) ### Supported backends diff --git a/llm/ext_server/server.cpp b/llm/ext_server/server.cpp index 8a0dffea..7d14e48e 100644 --- a/llm/ext_server/server.cpp +++ b/llm/ext_server/server.cpp @@ -835,7 +835,7 @@ struct llama_server_context system_tokens.clear(); if (!system_prompt.empty()) { - system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token); + system_tokens = ::llama_tokenize(ctx, system_prompt, true); llama_batch_clear(batch); @@ -1656,7 +1656,7 @@ struct llama_server_context slot.t_start_process_prompt = ggml_time_us(); slot.t_start_genereration = 0; - prompt_tokens = tokenize(slot.prompt, system_prompt.empty() && add_bos_token); // add BOS if there isn't system prompt + prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt slot.n_prompt_tokens = prompt_tokens.size(); diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index 93539bf6..dbf06c19 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -220,7 +220,7 @@ if [ -z "${ONEAPI_ROOT}" ]; then ONEAPI_ROOT=/opt/intel/oneapi fi -if [ -d "${ONEAPI_ROOT}" ]; then +if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then echo "OneAPI libraries detected - building dynamic OneAPI library" init_vars source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index b2dc38e6..afe9f4ea 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -303,7 +303,7 @@ function build_cuda() { } function build_oneapi() { - if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${env:ONEAPI_ROOT}")) { + if ((-not "${env:OLLAMA_SKIP_ONEAPI_GENERATE}") -and ("${env:ONEAPI_ROOT}")) { # Get oneAPI version $script:ONEAPI_VERSION = icpx --version $script:ONEAPI_VERSION = [regex]::Match($script:ONEAPI_VERSION, '(?<=oneAPI DPC\+\+/C\+\+ Compiler )(?\d+\.\d+\.\d+)').Value diff --git a/llm/gguf.go b/llm/gguf.go index ca7e340d..234efe57 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -618,22 +618,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error { } } - offset, err := ws.Seek(0, io.SeekCurrent) - if err != nil { - return err - } - var alignment int64 = 32 - padding := llm.padding(offset, alignment) - if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil { - return err - } - for _, tensor := range tensors { - if _, err := tensor.WriteTo(ws); err != nil { - return err - } - offset, err := ws.Seek(0, io.SeekCurrent) if err != nil { return err @@ -643,6 +629,10 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error { if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil { return err } + + if _, err := tensor.WriteTo(ws); err != nil { + return err + } } return nil diff --git a/server/images.go b/server/images.go index 32207f20..683057b8 100644 --- a/server/images.go +++ b/server/images.go @@ -437,18 +437,17 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture()) if s := baseLayer.GGML.KV().ChatTemplate(); s != "" { - t, err := templates.NamedTemplate(s) - if err != nil { - return err - } + if t, err := templates.NamedTemplate(s); err != nil { + slog.Debug("template detection", "error", err) + } else { + layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template") + if err != nil { + return err + } - layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template") - if err != nil { - return err + layer.status = fmt.Sprintf("using autodetected template %s", t.Name) + layers = append(layers, layer) } - - layer.status = fmt.Sprintf("using autodetected template %s", t.Name) - layers = append(layers, layer) } } diff --git a/server/routes_create_test.go b/server/routes_create_test.go index 19bf19ed..0fc76b96 100644 --- a/server/routes_create_test.go +++ b/server/routes_create_test.go @@ -15,11 +15,12 @@ import ( "github.com/gin-gonic/gin" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/llm" ) var stream bool = false -func createBinFile(t *testing.T) string { +func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string { t.Helper() f, err := os.CreateTemp(t.TempDir(), "") @@ -28,19 +29,7 @@ func createBinFile(t *testing.T) string { } defer f.Close() - if err := binary.Write(f, binary.LittleEndian, []byte("GGUF")); err != nil { - t.Fatal(err) - } - - if err := binary.Write(f, binary.LittleEndian, uint32(3)); err != nil { - t.Fatal(err) - } - - if err := binary.Write(f, binary.LittleEndian, uint64(0)); err != nil { - t.Fatal(err) - } - - if err := binary.Write(f, binary.LittleEndian, uint64(0)); err != nil { + if err := llm.NewGGUFV3(binary.LittleEndian).Encode(f, kv, ti); err != nil { t.Fatal(err) } @@ -101,7 +90,7 @@ func TestCreateFromBin(t *testing.T) { var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: "test", - Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), Stream: &stream, }) @@ -126,7 +115,7 @@ func TestCreateFromModel(t *testing.T) { w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: "test", - Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), Stream: &stream, }) @@ -166,7 +155,7 @@ func TestCreateRemovesLayers(t *testing.T) { w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}", createBinFile(t, nil, nil)), Stream: &stream, }) @@ -186,7 +175,7 @@ func TestCreateRemovesLayers(t *testing.T) { w = createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)), Stream: &stream, }) @@ -212,7 +201,7 @@ func TestCreateUnsetsSystem(t *testing.T) { w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nSYSTEM Say hi!", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s\nSYSTEM Say hi!", createBinFile(t, nil, nil)), Stream: &stream, }) @@ -232,7 +221,7 @@ func TestCreateUnsetsSystem(t *testing.T) { w = createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nSYSTEM \"\"", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s\nSYSTEM \"\"", createBinFile(t, nil, nil)), Stream: &stream, }) @@ -267,7 +256,7 @@ func TestCreateMergeParameters(t *testing.T) { w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nPARAMETER temperature 1\nPARAMETER top_k 10\nPARAMETER stop USER:\nPARAMETER stop ASSISTANT:", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s\nPARAMETER temperature 1\nPARAMETER top_k 10\nPARAMETER stop USER:\nPARAMETER stop ASSISTANT:", createBinFile(t, nil, nil)), Stream: &stream, }) @@ -369,7 +358,7 @@ func TestCreateReplacesMessages(t *testing.T) { w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nMESSAGE assistant \"What is my purpose?\"\nMESSAGE user \"You run tests.\"\nMESSAGE assistant \"Oh, my god.\"", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s\nMESSAGE assistant \"What is my purpose?\"\nMESSAGE user \"You run tests.\"\nMESSAGE assistant \"Oh, my god.\"", createBinFile(t, nil, nil)), Stream: &stream, }) @@ -444,7 +433,7 @@ func TestCreateTemplateSystem(t *testing.T) { w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}\nSYSTEM Say hello!\nTEMPLATE {{ .System }} {{ .Prompt }}\nSYSTEM Say bye!", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}\nSYSTEM Say hello!\nTEMPLATE {{ .System }} {{ .Prompt }}\nSYSTEM Say bye!", createBinFile(t, nil, nil)), Stream: &stream, }) @@ -489,7 +478,7 @@ func TestCreateLicenses(t *testing.T) { w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nLICENSE MIT\nLICENSE Apache-2.0", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s\nLICENSE MIT\nLICENSE Apache-2.0", createBinFile(t, nil, nil)), Stream: &stream, }) @@ -526,3 +515,46 @@ func TestCreateLicenses(t *testing.T) { t.Errorf("expected Apache-2.0, actual %s", apache) } } + +func TestCreateDetectTemplate(t *testing.T) { + p := t.TempDir() + t.Setenv("OLLAMA_MODELS", p) + var s Server + + t.Run("matched", func(t *testing.T) { + w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ + Name: "test", + Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ + "tokenizer.chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}", + }, nil)), + Stream: &stream, + }) + + if w.Code != http.StatusOK { + t.Fatalf("expected status code 200, actual %d", w.Code) + } + + checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ + filepath.Join(p, "blobs", "sha256-06cd2687a518d624073f125f1db1c5c727f77c75e84a138fe745186dbbbb4cd7"), + filepath.Join(p, "blobs", "sha256-542b217f179c7825eeb5bca3c77d2b75ed05bafbd3451d9188891a60a85337c6"), + filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"), + }) + }) + + t.Run("unmatched", func(t *testing.T) { + w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ + Name: "test", + Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), + Stream: &stream, + }) + + if w.Code != http.StatusOK { + t.Fatalf("expected status code 200, actual %d", w.Code) + } + + checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ + filepath.Join(p, "blobs", "sha256-a4e5e156ddec27e286f75328784d7106b60a4eb1d246e950a001a3f944fbda99"), + filepath.Join(p, "blobs", "sha256-ca239d7bd8ea90e4a5d2e6bf88f8d74a47b14336e73eb4e18bed4dd325018116"), + }) + }) +} diff --git a/server/routes_delete_test.go b/server/routes_delete_test.go index ea098d05..d0990009 100644 --- a/server/routes_delete_test.go +++ b/server/routes_delete_test.go @@ -16,7 +16,7 @@ func TestDelete(t *testing.T) { w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: "test", - Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), }) if w.Code != http.StatusOK { @@ -25,7 +25,7 @@ func TestDelete(t *testing.T) { w = createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: "test2", - Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)), }) if w.Code != http.StatusOK { diff --git a/server/routes_list_test.go b/server/routes_list_test.go index e92b4eab..97bf8b8f 100644 --- a/server/routes_list_test.go +++ b/server/routes_list_test.go @@ -29,7 +29,7 @@ func TestList(t *testing.T) { for _, n := range expectNames { createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: n, - Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), }) } diff --git a/server/routes_test.go b/server/routes_test.go index 91ef625b..4e9cfc2a 100644 --- a/server/routes_test.go +++ b/server/routes_test.go @@ -261,7 +261,7 @@ func TestCase(t *testing.T) { t.Run(tt, func(t *testing.T) { w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: tt, - Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), Stream: &stream, }) @@ -277,7 +277,7 @@ func TestCase(t *testing.T) { t.Run("create", func(t *testing.T) { w = createRequest(t, s.CreateModelHandler, api.CreateRequest{ Name: strings.ToUpper(tt), - Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)), + Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), Stream: &stream, }) diff --git a/templates/template.go b/templates/template.go index 87962695..72bd69e9 100644 --- a/templates/template.go +++ b/templates/template.go @@ -30,7 +30,8 @@ var templatesOnce = sync.OnceValues(func() ([]*Template, error) { return nil, err } - t.Bytes = bts + // normalize line endings + t.Bytes = bytes.ReplaceAll(bts, []byte("\r\n"), []byte("\n")) } return templates, nil