Merge branch 'ollama:main' into main

2025-12-25 07:58:01 +00:00 · 2024-06-09 10:10:23 +08:00
parent 27e7397b11 85169e8d6f
commit 76026b4a35
11 changed files with 82 additions and 58 deletions
--- a/README.md
+++ b/README.md
@@ -344,6 +344,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
 - [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
 - [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
+- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
 - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
 - [LiteLLM](https://github.com/BerriAI/litellm)
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
@@ -399,6 +400,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
 - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
 - [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
+- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)

 ### Supported backends

--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -835,7 +835,7 @@ struct llama_server_context
        system_tokens.clear();

        if (!system_prompt.empty()) {
-            system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
+            system_tokens = ::llama_tokenize(ctx, system_prompt, true);

            llama_batch_clear(batch);

@@ -1656,7 +1656,7 @@ struct llama_server_context
                    slot.t_start_process_prompt = ggml_time_us();
                    slot.t_start_genereration = 0;

-                    prompt_tokens = tokenize(slot.prompt, system_prompt.empty() && add_bos_token);  // add BOS if there isn't system prompt
+                    prompt_tokens = tokenize(slot.prompt, system_prompt.empty());  // add BOS if there isn't system prompt

                    slot.n_prompt_tokens = prompt_tokens.size();

--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -220,7 +220,7 @@ if [ -z "${ONEAPI_ROOT}" ]; then
    ONEAPI_ROOT=/opt/intel/oneapi
 fi

-if [ -d "${ONEAPI_ROOT}" ]; then
+if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
    echo "OneAPI libraries detected - building dynamic OneAPI library"
    init_vars
    source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -303,7 +303,7 @@ function build_cuda() {
 }

 function build_oneapi() {
-  if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${env:ONEAPI_ROOT}"))  {
+  if ((-not "${env:OLLAMA_SKIP_ONEAPI_GENERATE}") -and ("${env:ONEAPI_ROOT}"))  {
    # Get oneAPI version
    $script:ONEAPI_VERSION = icpx --version
    $script:ONEAPI_VERSION = [regex]::Match($script:ONEAPI_VERSION, '(?<=oneAPI DPC\+\+/C\+\+ Compiler )(?<version>\d+\.\d+\.\d+)').Value
--- a/llm/gguf.go
+++ b/llm/gguf.go
@@ -618,22 +618,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 		}
 	}

-	offset, err := ws.Seek(0, io.SeekCurrent)
-	if err != nil {
-		return err
-	}
-
 	var alignment int64 = 32
-	padding := llm.padding(offset, alignment)
-	if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
-		return err
-	}
-
 	for _, tensor := range tensors {
-		if _, err := tensor.WriteTo(ws); err != nil {
-			return err
-		}
-
 		offset, err := ws.Seek(0, io.SeekCurrent)
 		if err != nil {
 			return err
@@ -643,6 +629,10 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 		if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
 			return err
 		}
+
+		if _, err := tensor.WriteTo(ws); err != nil {
+			return err
+		}
 	}

 	return nil
--- a/server/images.go
+++ b/server/images.go
@@ -437,18 +437,17 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
 					config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())

 					if s := baseLayer.GGML.KV().ChatTemplate(); s != "" {
-						t, err := templates.NamedTemplate(s)
-						if err != nil {
-							return err
-						}
+						if t, err := templates.NamedTemplate(s); err != nil {
+							slog.Debug("template detection", "error", err)
+						} else {
+							layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
+							if err != nil {
+								return err
+							}

-						layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
-						if err != nil {
-							return err
+							layer.status = fmt.Sprintf("using autodetected template %s", t.Name)
+							layers = append(layers, layer)
 						}
-
-						layer.status = fmt.Sprintf("using autodetected template %s", t.Name)
-						layers = append(layers, layer)
 					}
 				}

--- a/server/routes_create_test.go
+++ b/server/routes_create_test.go
@@ -15,11 +15,12 @@ import (

 	"github.com/gin-gonic/gin"
 	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/llm"
 )

 var stream bool = false

-func createBinFile(t *testing.T) string {
+func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string {
 	t.Helper()

 	f, err := os.CreateTemp(t.TempDir(), "")
@@ -28,19 +29,7 @@ func createBinFile(t *testing.T) string {
 	}
 	defer f.Close()

-	if err := binary.Write(f, binary.LittleEndian, []byte("GGUF")); err != nil {
-		t.Fatal(err)
-	}
-
-	if err := binary.Write(f, binary.LittleEndian, uint32(3)); err != nil {
-		t.Fatal(err)
-	}
-
-	if err := binary.Write(f, binary.LittleEndian, uint64(0)); err != nil {
-		t.Fatal(err)
-	}
-
-	if err := binary.Write(f, binary.LittleEndian, uint64(0)); err != nil {
+	if err := llm.NewGGUFV3(binary.LittleEndian).Encode(f, kv, ti); err != nil {
 		t.Fatal(err)
 	}

@@ -101,7 +90,7 @@ func TestCreateFromBin(t *testing.T) {
 	var s Server
 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)),
+		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
 		Stream:    &stream,
 	})

@@ -126,7 +115,7 @@ func TestCreateFromModel(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)),
+		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
 		Stream:    &stream,
 	})

@@ -166,7 +155,7 @@ func TestCreateRemovesLayers(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}", createBinFile(t)),
+		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}", createBinFile(t, nil, nil)),
 		Stream:    &stream,
 	})

@@ -186,7 +175,7 @@ func TestCreateRemovesLayers(t *testing.T) {

 	w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t)),
+		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)),
 		Stream:    &stream,
 	})

@@ -212,7 +201,7 @@ func TestCreateUnsetsSystem(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nSYSTEM Say hi!", createBinFile(t)),
+		Modelfile: fmt.Sprintf("FROM %s\nSYSTEM Say hi!", createBinFile(t, nil, nil)),
 		Stream:    &stream,
 	})

@@ -232,7 +221,7 @@ func TestCreateUnsetsSystem(t *testing.T) {

 	w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nSYSTEM \"\"", createBinFile(t)),
+		Modelfile: fmt.Sprintf("FROM %s\nSYSTEM \"\"", createBinFile(t, nil, nil)),
 		Stream:    &stream,
 	})

@@ -267,7 +256,7 @@ func TestCreateMergeParameters(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nPARAMETER temperature 1\nPARAMETER top_k 10\nPARAMETER stop USER:\nPARAMETER stop ASSISTANT:", createBinFile(t)),
+		Modelfile: fmt.Sprintf("FROM %s\nPARAMETER temperature 1\nPARAMETER top_k 10\nPARAMETER stop USER:\nPARAMETER stop ASSISTANT:", createBinFile(t, nil, nil)),
 		Stream:    &stream,
 	})

@@ -369,7 +358,7 @@ func TestCreateReplacesMessages(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nMESSAGE assistant \"What is my purpose?\"\nMESSAGE user \"You run tests.\"\nMESSAGE assistant \"Oh, my god.\"", createBinFile(t)),
+		Modelfile: fmt.Sprintf("FROM %s\nMESSAGE assistant \"What is my purpose?\"\nMESSAGE user \"You run tests.\"\nMESSAGE assistant \"Oh, my god.\"", createBinFile(t, nil, nil)),
 		Stream:    &stream,
 	})

@@ -444,7 +433,7 @@ func TestCreateTemplateSystem(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}\nSYSTEM Say hello!\nTEMPLATE {{ .System }} {{ .Prompt }}\nSYSTEM Say bye!", createBinFile(t)),
+		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}\nSYSTEM Say hello!\nTEMPLATE {{ .System }} {{ .Prompt }}\nSYSTEM Say bye!", createBinFile(t, nil, nil)),
 		Stream:    &stream,
 	})

@@ -489,7 +478,7 @@ func TestCreateLicenses(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s\nLICENSE MIT\nLICENSE Apache-2.0", createBinFile(t)),
+		Modelfile: fmt.Sprintf("FROM %s\nLICENSE MIT\nLICENSE Apache-2.0", createBinFile(t, nil, nil)),
 		Stream:    &stream,
 	})

@@ -526,3 +515,46 @@ func TestCreateLicenses(t *testing.T) {
 		t.Errorf("expected Apache-2.0, actual %s", apache)
 	}
 }
+
+func TestCreateDetectTemplate(t *testing.T) {
+	p := t.TempDir()
+	t.Setenv("OLLAMA_MODELS", p)
+	var s Server
+
+	t.Run("matched", func(t *testing.T) {
+		w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+			Name: "test",
+			Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
+				"tokenizer.chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
+			}, nil)),
+			Stream: &stream,
+		})
+
+		if w.Code != http.StatusOK {
+			t.Fatalf("expected status code 200, actual %d", w.Code)
+		}
+
+		checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
+			filepath.Join(p, "blobs", "sha256-06cd2687a518d624073f125f1db1c5c727f77c75e84a138fe745186dbbbb4cd7"),
+			filepath.Join(p, "blobs", "sha256-542b217f179c7825eeb5bca3c77d2b75ed05bafbd3451d9188891a60a85337c6"),
+			filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
+		})
+	})
+
+	t.Run("unmatched", func(t *testing.T) {
+		w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+			Name:      "test",
+			Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
+			Stream:    &stream,
+		})
+
+		if w.Code != http.StatusOK {
+			t.Fatalf("expected status code 200, actual %d", w.Code)
+		}
+
+		checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
+			filepath.Join(p, "blobs", "sha256-a4e5e156ddec27e286f75328784d7106b60a4eb1d246e950a001a3f944fbda99"),
+			filepath.Join(p, "blobs", "sha256-ca239d7bd8ea90e4a5d2e6bf88f8d74a47b14336e73eb4e18bed4dd325018116"),
+		})
+	})
+}
--- a/server/routes_delete_test.go
+++ b/server/routes_delete_test.go
@@ -16,7 +16,7 @@ func TestDelete(t *testing.T) {

 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test",
-		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)),
+		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
 	})

 	if w.Code != http.StatusOK {
@@ -25,7 +25,7 @@ func TestDelete(t *testing.T) {

 	w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
 		Name:      "test2",
-		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t)),
+		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)),
 	})

 	if w.Code != http.StatusOK {
--- a/server/routes_list_test.go
+++ b/server/routes_list_test.go
@@ -29,7 +29,7 @@ func TestList(t *testing.T) {
 	for _, n := range expectNames {
 		createRequest(t, s.CreateModelHandler, api.CreateRequest{
 			Name:      n,
-			Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)),
+			Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
 		})
 	}

--- a/server/routes_test.go
+++ b/server/routes_test.go
@@ -261,7 +261,7 @@ func TestCase(t *testing.T) {
 		t.Run(tt, func(t *testing.T) {
 			w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
 				Name:      tt,
-				Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)),
+				Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
 				Stream:    &stream,
 			})

@@ -277,7 +277,7 @@ func TestCase(t *testing.T) {
 			t.Run("create", func(t *testing.T) {
 				w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
 					Name:      strings.ToUpper(tt),
-					Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)),
+					Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
 					Stream:    &stream,
 				})

--- a/templates/template.go
+++ b/templates/template.go
@@ -30,7 +30,8 @@ var templatesOnce = sync.OnceValues(func() ([]*Template, error) {
 			return nil, err
 		}

-		t.Bytes = bts
+		// normalize line endings
+		t.Bytes = bytes.ReplaceAll(bts, []byte("\r\n"), []byte("\n"))
 	}

 	return templates, nil