Merge branch 'ollama:main' into main

move OLLAMA_HOST to envconfig (#5009 )
Merge pull request #5004 from ollama/mxyng/fix-templates
2025-12-22 23:03:55 +00:00 · 2024-06-14 00:59:47 +08:00 · 2024-06-12 18:48:16 -04:00 · 2024-06-12 14:39:29 -07:00 · 2024-06-12 13:35:49 -07:00 · 2024-06-12 15:35:25 +08:00
21 changed files with 361 additions and 334 deletions
--- a/README.md
+++ b/README.md
@@ -350,6 +350,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
 - [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
 - [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
 - [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
 - [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
 - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
 - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
--- a/api/client.go
+++ b/api/client.go
@@ -23,11 +23,9 @@ import (
 	"net"
 	"net/http"
 	"net/url"
 	"os"
 	"runtime"
 	"strconv"
 	"strings"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/version"
 )
@@ -65,10 +63,7 @@ func checkError(resp *http.Response, body []byte) error {
 // If the variable is not specified, a default ollama host and port will be
 // used.
 func ClientFromEnvironment() (*Client, error) {
-	ollamaHost, err := GetOllamaHost()
+	ollamaHost := envconfig.Host
 	if err != nil {
 		return nil, err
 	}
 	return &Client{
 		base: &url.URL{
@@ -79,52 +74,6 @@ func ClientFromEnvironment() (*Client, error) {
 	}, nil
 }
 type OllamaHost struct {
 	Scheme string
 	Host   string
 	Port   string
 }
 func GetOllamaHost() (OllamaHost, error) {
 	defaultPort := "11434"
 	hostVar := os.Getenv("OLLAMA_HOST")
 	hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
 	scheme, hostport, ok := strings.Cut(hostVar, "://")
 	switch {
 	case !ok:
 		scheme, hostport = "http", hostVar
 	case scheme == "http":
 		defaultPort = "80"
 	case scheme == "https":
 		defaultPort = "443"
 	}
 	// trim trailing slashes
 	hostport = strings.TrimRight(hostport, "/")
 	host, port, err := net.SplitHostPort(hostport)
 	if err != nil {
 		host, port = "127.0.0.1", defaultPort
 		if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
 			host = ip.String()
 		} else if hostport != "" {
 			host = hostport
 		}
 	}
 	if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
 		return OllamaHost{}, ErrInvalidHostPort
 	}
 	return OllamaHost{
 		Scheme: scheme,
 		Host:   host,
 		Port:   port,
 	}, nil
 }
 func NewClient(base *url.URL, http *http.Client) *Client {
 	return &Client{
 		base: base,
--- a/api/client_test.go
+++ b/api/client_test.go
@@ -1,11 +1,9 @@
 package api
 import (
 	"fmt"
 	"net"
 	"testing"
-	"github.com/stretchr/testify/assert"
+	"github.com/ollama/ollama/envconfig"
 )
 func TestClientFromEnvironment(t *testing.T) {
@@ -35,6 +33,7 @@ func TestClientFromEnvironment(t *testing.T) {
 	for k, v := range testCases {
 		t.Run(k, func(t *testing.T) {
 			t.Setenv("OLLAMA_HOST", v.value)
 			envconfig.LoadConfig()
 			client, err := ClientFromEnvironment()
 			if err != v.err {
@@ -46,40 +45,4 @@ func TestClientFromEnvironment(t *testing.T) {
 			}
 		})
 	}
 	hostTestCases := map[string]*testCase{
 		"empty":               {value: "", expect: "127.0.0.1:11434"},
 		"only address":        {value: "1.2.3.4", expect: "1.2.3.4:11434"},
 		"only port":           {value: ":1234", expect: ":1234"},
 		"address and port":    {value: "1.2.3.4:1234", expect: "1.2.3.4:1234"},
 		"hostname":            {value: "example.com", expect: "example.com:11434"},
 		"hostname and port":   {value: "example.com:1234", expect: "example.com:1234"},
 		"zero port":           {value: ":0", expect: ":0"},
 		"too large port":      {value: ":66000", err: ErrInvalidHostPort},
 		"too small port":      {value: ":-1", err: ErrInvalidHostPort},
 		"ipv6 localhost":      {value: "[::1]", expect: "[::1]:11434"},
 		"ipv6 world open":     {value: "[::]", expect: "[::]:11434"},
 		"ipv6 no brackets":    {value: "::1", expect: "[::1]:11434"},
 		"ipv6 + port":         {value: "[::1]:1337", expect: "[::1]:1337"},
 		"extra space":         {value: " 1.2.3.4 ", expect: "1.2.3.4:11434"},
 		"extra quotes":        {value: "\"1.2.3.4\"", expect: "1.2.3.4:11434"},
 		"extra space+quotes":  {value: " \" 1.2.3.4 \" ", expect: "1.2.3.4:11434"},
 		"extra single quotes": {value: "'1.2.3.4'", expect: "1.2.3.4:11434"},
 	}
 	for k, v := range hostTestCases {
 		t.Run(k, func(t *testing.T) {
 			t.Setenv("OLLAMA_HOST", v.value)
 			oh, err := GetOllamaHost()
 			if err != v.err {
 				t.Fatalf("expected %s, got %s", v.err, err)
 			}
 			if err == nil {
 				host := net.JoinHostPort(oh.Host, oh.Port)
 				assert.Equal(t, v.expect, host, fmt.Sprintf("%s: expected %s, got %s", k, v.expect, host))
 			}
 		})
 	}
 }
--- a/api/types.go
+++ b/api/types.go
@@ -2,7 +2,6 @@ package api
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"log/slog"
 	"math"
@@ -377,8 +376,6 @@ func (m *Metrics) Summary() {
 	}
 }
 var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
 func (opts *Options) FromMap(m map[string]interface{}) error {
 	valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
 	typeOpts := reflect.TypeOf(opts).Elem()   // types of the fields in the options struct
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -960,17 +960,11 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 }
 func RunServer(cmd *cobra.Command, _ []string) error {
 	// retrieve the OLLAMA_HOST environment variable
 	ollamaHost, err := api.GetOllamaHost()
 	if err != nil {
 		return err
 	}
 	if err := initializeKeypair(); err != nil {
 		return err
 	}
-	ln, err := net.Listen("tcp", net.JoinHostPort(ollamaHost.Host, ollamaHost.Port))
+	ln, err := net.Listen("tcp", net.JoinHostPort(envconfig.Host.Host, envconfig.Host.Port))
 	if err != nil {
 		return err
 	}
--- a/docs/api.md
+++ b/docs/api.md
@@ -250,7 +250,7 @@ curl http://localhost:11434/api/generate -d '{
 #### Request (Reproducible outputs)
-For reproducible outputs, set `temperature` to 0 and `seed` to a number:
+For reproducible outputs, set `seed` to a number:
 ##### Request
@@ -259,8 +259,7 @@ curl http://localhost:11434/api/generate -d '{
  "model": "mistral",
  "prompt": "Why is the sky blue?",
  "options": {
-    "seed": 123,
+    "seed": 123
    "temperature": 0
  }
 }'
 ```
@@ -1044,11 +1043,10 @@ GET /api/ps
 List models that are currently loaded into memory.
 \* If a model is loaded completely into system memory, `size_vram` is omitted from the response.
 #### Examples
 ### Request
 ```shell
 curl http://localhost:11434/api/ps
 ```
--- a/docs/import.md
+++ b/docs/import.md
@@ -1,170 +1,99 @@
-# Import a model
+# Import
-This guide walks through importing a GGUF, PyTorch or Safetensors model.
+GGUF models and select Safetensors models can be imported directly into Ollama.
-## Importing (GGUF)
+## Import GGUF
-### Step 1: Write a `Modelfile`
+A binary GGUF file can be imported directly into Ollama through a Modelfile.
-Start by creating a `Modelfile`. This file is the blueprint for your model, specifying weights, parameters, prompt templates and more.
+```dockerfile
-
+FROM /path/to/file.gguf
 ```
 FROM ./mistral-7b-v0.1.Q4_0.gguf
 ```
-(Optional) many chat models require a prompt template in order to answer correctly. A default prompt template can be specified with the `TEMPLATE` instruction in the `Modelfile`:
+## Import Safetensors
-```
+If the model being imported is one of these architectures, it can be imported directly into Ollama through a Modelfile:
-FROM ./mistral-7b-v0.1.Q4_0.gguf
+
-TEMPLATE "[INST] {{ .Prompt }} [/INST]"
+ - LlamaForCausalLM
 - MistralForCausalLM
 - GemmaForCausalLM
 ```dockerfile
 FROM /path/to/safetensors/directory
 ```
-### Step 2: Create the Ollama model
+For architectures not directly convertable by Ollama, see llama.cpp's [guide](https://github.com/ggerganov/llama.cpp/blob/master/README.md#prepare-and-quantize) on conversion. After conversion, see [Import GGUF](#import-gguf).
-Finally, create a model from your `Modelfile`:
+## Automatic Quantization
 > [!NOTE]
 > Automatic quantization requires v0.1.35 or higher.
 Ollama is capable of quantizing FP16 or FP32 models to any of the supported quantizations with the `-q/--quantize` flag in `ollama create`.
 ```dockerfile
 FROM /path/to/my/gemma/f16/model
 ```
 ollama create example -f Modelfile
 ```
 ### Step 3: Run your model
 Next, test the model with `ollama run`:
 ```
 ollama run example "What is your favourite condiment?"
 ```
 ## Importing (PyTorch & Safetensors)
 > Importing from PyTorch and Safetensors is a longer process than importing from GGUF. Improvements that make it easier are a work in progress.
 ### Setup
 First, clone the `ollama/ollama` repo:
 ```
 git clone git@github.com:ollama/ollama.git ollama
 cd ollama
 ```
 and then fetch its `llama.cpp` submodule:
 ```shell
-git submodule init
+$ ollama create -q Q4_K_M mymodel
-git submodule update llm/llama.cpp
+transferring model data
 quantizing F16 model to Q4_K_M
 creating new layer sha256:735e246cc1abfd06e9cdcf95504d6789a6cd1ad7577108a70d9902fef503c1bd
 creating new layer sha256:0853f0ad24e5865173bbf9ffcc7b0f5d56b66fd690ab1009867e45e7d2c4db0f
 writing manifest
 success
 ```
-Next, install the Python dependencies:
+### Supported Quantizations
-```
+<details>
-python3 -m venv llm/llama.cpp/.venv
+<summary>Legacy Quantization</summary>
-source llm/llama.cpp/.venv/bin/activate
+
-pip install -r llm/llama.cpp/requirements.txt
+- `Q4_0`
 - `Q4_1`
 - `Q5_0`
 - `Q5_1`
 - `Q8_0`
 </details>
 <details>
 <summary>K-means Quantization</summary>`
 - `Q3_K_S`
 - `Q3_K_M`
 - `Q3_K_L`
 - `Q4_K_S`
 - `Q4_K_M`
 - `Q5_K_S`
 - `Q5_K_M`
 - `Q6_K`
 </details>
 > [!NOTE]
 > Activation-aware Weight Quantization (i.e. IQ) are not currently supported for automatic quantization however you can still import the quantized model into Ollama, see [Import GGUF](#import-gguf).
 ## Template Detection
 > [!NOTE]
 > Template detection requires v0.1.42 or higher.
 Ollama uses model metadata, specifically `tokenizer.chat_template`, to automatically create a template appropriate for the model you're importing.
 ```dockerfile
 FROM /path/to/my/gemma/model
 ```
-Then build the `quantize` tool:
+```shell
-
+$ ollama create mymodel
-```
+transferring model data
-make -C llm/llama.cpp quantize
+using autodetected template gemma-instruct
 creating new layer sha256:baa2a0edc27d19cc6b7537578a9a7ba1a4e3214dc185ed5ae43692b319af7b84
 creating new layer sha256:ba66c3309914dbef07e5149a648fd1877f030d337a4f240d444ea335008943cb
 writing manifest
 success
 ```
-### Clone the HuggingFace repository (optional)
+Defining a template in the Modelfile will disable this feature which may be useful if you want to use a different template than the autodetected one.
 If the model is currently hosted in a HuggingFace repository, first clone that repository to download the raw model.
 Install [Git LFS](https://docs.github.com/en/repositories/working-with-files/managing-large-files/installing-git-large-file-storage), verify it's installed, and then clone the model's repository:
 ```
 git lfs install
 git clone https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 model
 ```
 ### Convert the model
 > Note: some model architectures require using specific convert scripts. For example, Qwen models require running `convert-hf-to-gguf.py` instead of `convert.py`
 ```
 python llm/llama.cpp/convert.py ./model --outtype f16 --outfile converted.bin
 ```
 ### Quantize the model
 ```
 llm/llama.cpp/quantize converted.bin quantized.bin q4_0
 ```
 ### Step 3: Write a `Modelfile`
 Next, create a `Modelfile` for your model:
 ```
 FROM quantized.bin
 TEMPLATE "[INST] {{ .Prompt }} [/INST]"
 ```
 ### Step 4: Create the Ollama model
 Finally, create a model from your `Modelfile`:
 ```
 ollama create example -f Modelfile
 ```
 ### Step 5: Run your model
 Next, test the model with `ollama run`:
 ```
 ollama run example "What is your favourite condiment?"
 ```
 ## Publishing your model (optional – early alpha)
 Publishing models is in early alpha. If you'd like to publish your model to share with others, follow these steps:
 1. Create [an account](https://ollama.com/signup)
 2. Copy your Ollama public key:
  - macOS: `cat ~/.ollama/id_ed25519.pub | pbcopy`
  - Windows: `type %USERPROFILE%\.ollama\id_ed25519.pub`
  - Linux: `cat /usr/share/ollama/.ollama/id_ed25519.pub`
 3. Add your public key to your [Ollama account](https://ollama.com/settings/keys)
 Next, copy your model to your username's namespace:
 ```
 ollama cp example <your username>/example
 ```
 > Note: model names may only contain lowercase letters, digits, and the characters `.`, `-`, and `_`.
 Then push the model:
 ```
 ollama push <your username>/example
 ```
 After publishing, your model will be available at `https://ollama.com/<your username>/example`.
 ## Quantization reference
 The quantization options are as follow (from highest highest to lowest levels of quantization). Note: some architectures such as Falcon do not support K quants.
 - `q2_K`
 - `q3_K`
 - `q3_K_S`
 - `q3_K_M`
 - `q3_K_L`
 - `q4_0` (recommended)
 - `q4_1`
 - `q4_K`
 - `q4_K_S`
 - `q4_K_M`
 - `q5_0`
 - `q5_1`
 - `q5_K`
 - `q5_K_S`
 - `q5_K_M`
 - `q6_K`
 - `q8_0`
 - `f16`
--- a/docs/linux.md
+++ b/docs/linux.md
@@ -100,6 +100,16 @@ sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
 sudo chmod +x /usr/bin/ollama
 ```
 ## Installing specific versions
 Use `OLLAMA_VERSION` environment variable with the install script to install a specific version of Ollama, including pre-releases. You can find the version numbers in the [releases page](https://github.com/ollama/ollama/releases). 
 For example:
 ```
 curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.1.32 sh
 ```
 ## Viewing logs
 To view logs of Ollama running as a startup service, run:
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -1,6 +1,7 @@
 package envconfig
 import (
 	"errors"
 	"fmt"
 	"log/slog"
 	"net"
@@ -11,6 +12,18 @@ import (
 	"strings"
 )
 type OllamaHost struct {
 	Scheme string
 	Host   string
 	Port   string
 }
 func (o OllamaHost) String() string {
 	return fmt.Sprintf("%s://%s:%s", o.Scheme, o.Host, o.Port)
 }
 var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
 var (
 	// Set via OLLAMA_ORIGINS in the environment
 	AllowOrigins []string
@@ -34,6 +47,8 @@ var (
 	NoPrune bool
 	// Set via OLLAMA_NUM_PARALLEL in the environment
 	NumParallel int
 	// Set via OLLAMA_HOST in the environment
 	Host *OllamaHost
 	// Set via OLLAMA_RUNNERS_DIR in the environment
 	RunnersDir string
 	// Set via OLLAMA_TMPDIR in the environment
@@ -50,7 +65,7 @@ func AsMap() map[string]EnvVar {
 	return map[string]EnvVar{
 		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
 		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
-		"OLLAMA_HOST":              {"OLLAMA_HOST", "", "IP Address for the ollama server (default 127.0.0.1:11434)"},
+		"OLLAMA_HOST":              {"OLLAMA_HOST", Host, "IP Address for the ollama server (default 127.0.0.1:11434)"},
 		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
 		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"},
 		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models (default 1)"},
@@ -216,4 +231,54 @@ func LoadConfig() {
 	}
 	KeepAlive = clean("OLLAMA_KEEP_ALIVE")
 	var err error
 	Host, err = getOllamaHost()
 	if err != nil {
 		slog.Error("invalid setting", "OLLAMA_HOST", Host, "error", err, "using default port", Host.Port)
 	}
 }
 func getOllamaHost() (*OllamaHost, error) {
 	defaultPort := "11434"
 	hostVar := os.Getenv("OLLAMA_HOST")
 	hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
 	scheme, hostport, ok := strings.Cut(hostVar, "://")
 	switch {
 	case !ok:
 		scheme, hostport = "http", hostVar
 	case scheme == "http":
 		defaultPort = "80"
 	case scheme == "https":
 		defaultPort = "443"
 	}
 	// trim trailing slashes
 	hostport = strings.TrimRight(hostport, "/")
 	host, port, err := net.SplitHostPort(hostport)
 	if err != nil {
 		host, port = "127.0.0.1", defaultPort
 		if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
 			host = ip.String()
 		} else if hostport != "" {
 			host = hostport
 		}
 	}
 	if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
 		return &OllamaHost{
 			Scheme: scheme,
 			Host:   host,
 			Port:   defaultPort,
 		}, ErrInvalidHostPort
 	}
 	return &OllamaHost{
 		Scheme: scheme,
 		Host:   host,
 		Port:   port,
 	}, nil
 }
--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -1,8 +1,11 @@
 package envconfig
 import (
 	"fmt"
 	"net"
 	"testing"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -21,3 +24,48 @@ func TestConfig(t *testing.T) {
 	LoadConfig()
 	require.True(t, FlashAttention)
 }
 func TestClientFromEnvironment(t *testing.T) {
 	type testCase struct {
 		value  string
 		expect string
 		err    error
 	}
 	hostTestCases := map[string]*testCase{
 		"empty":               {value: "", expect: "127.0.0.1:11434"},
 		"only address":        {value: "1.2.3.4", expect: "1.2.3.4:11434"},
 		"only port":           {value: ":1234", expect: ":1234"},
 		"address and port":    {value: "1.2.3.4:1234", expect: "1.2.3.4:1234"},
 		"hostname":            {value: "example.com", expect: "example.com:11434"},
 		"hostname and port":   {value: "example.com:1234", expect: "example.com:1234"},
 		"zero port":           {value: ":0", expect: ":0"},
 		"too large port":      {value: ":66000", err: ErrInvalidHostPort},
 		"too small port":      {value: ":-1", err: ErrInvalidHostPort},
 		"ipv6 localhost":      {value: "[::1]", expect: "[::1]:11434"},
 		"ipv6 world open":     {value: "[::]", expect: "[::]:11434"},
 		"ipv6 no brackets":    {value: "::1", expect: "[::1]:11434"},
 		"ipv6 + port":         {value: "[::1]:1337", expect: "[::1]:1337"},
 		"extra space":         {value: " 1.2.3.4 ", expect: "1.2.3.4:11434"},
 		"extra quotes":        {value: "\"1.2.3.4\"", expect: "1.2.3.4:11434"},
 		"extra space+quotes":  {value: " \" 1.2.3.4 \" ", expect: "1.2.3.4:11434"},
 		"extra single quotes": {value: "'1.2.3.4'", expect: "1.2.3.4:11434"},
 	}
 	for k, v := range hostTestCases {
 		t.Run(k, func(t *testing.T) {
 			t.Setenv("OLLAMA_HOST", v.value)
 			LoadConfig()
 			oh, err := getOllamaHost()
 			if err != v.err {
 				t.Fatalf("expected %s, got %s", v.err, err)
 			}
 			if err == nil {
 				host := net.JoinHostPort(oh.Host, oh.Port)
 				assert.Equal(t, v.expect, host, fmt.Sprintf("%s: expected %s, got %s", k, v.expect, host))
 			}
 		})
 	}
 }
--- a/examples/langchain-python-rag-privategpt/ingest.py
+++ b/examples/langchain-python-rag-privategpt/ingest.py
@@ -77,13 +77,21 @@ LOADER_MAPPING = {
 def load_single_document(file_path: str) -> List[Document]:
-    ext = "." + file_path.rsplit(".", 1)[-1]
+    if os.path.getsize(file_path) != 0:
        filename, ext = os.path.splitext(file_path)
        if ext in LOADER_MAPPING:
            loader_class, loader_args = LOADER_MAPPING[ext]
            try:
                loader = loader_class(file_path, **loader_args)
                if loader:
                    return loader.load()
            except:
                print(f"Corrupted file {file_path}. Ignoring it.")
        else:
            print(f"Unsupported file {file_path}. Ignoring it.")
    else:
        print(f"Empty file {file_path}. Ignoring it.")
    raise ValueError(f"Unsupported file extension '{ext}'")
 def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Document]:
    """
@@ -100,6 +108,7 @@ def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Docum
        results = []
        with tqdm(total=len(filtered_files), desc='Loading new documents', ncols=80) as pbar:
            for i, docs in enumerate(pool.imap_unordered(load_single_document, filtered_files)):
                if docs:
                    results.extend(docs)
                pbar.update()
--- a/examples/langchain-python-rag-privategpt/requirements.txt
+++ b/examples/langchain-python-rag-privategpt/requirements.txt
@@ -12,3 +12,4 @@ pandoc==2.3
 pypandoc==1.11
 tqdm==4.66.1
 sentence_transformers==2.2.2
 numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -359,7 +359,6 @@ struct llama_server_context
    // slots / clients
    std::vector<server_slot> slots;
    json default_generation_settings_for_props;
    llama_server_queue    queue_tasks;
    llama_server_response queue_results;
@@ -483,9 +482,6 @@ struct llama_server_context
            slots.push_back(slot);
        }
        default_generation_settings_for_props = get_formated_generation(slots.front());
        default_generation_settings_for_props["seed"] = -1;
        batch = llama_batch_init(n_ctx, 0, params.n_parallel);
    }
@@ -584,7 +580,7 @@ struct llama_server_context
        slot->sparams.mirostat_eta      = json_value(data, "mirostat_eta",      default_sparams.mirostat_eta);
        slot->sparams.penalize_nl       = json_value(data, "penalize_nl",       default_sparams.penalize_nl);
        slot->params.n_keep             = json_value(data, "n_keep",            slot->params.n_keep);
-        slot->params.seed               = json_value(data, "seed",              default_params.seed);
+        slot->sparams.seed              = json_value(data, "seed",              default_params.seed);
        slot->sparams.grammar           = json_value(data, "grammar",           default_sparams.grammar);
        slot->sparams.n_probs           = json_value(data, "n_probs",           default_sparams.n_probs);
        slot->sparams.min_keep          = json_value(data, "min_keep",          default_sparams.min_keep);
@@ -811,7 +807,6 @@ struct llama_server_context
            llama_sampling_free(slot->ctx_sampling);
        }
        slot->ctx_sampling = llama_sampling_init(slot->sparams);
        llama_set_rng_seed(ctx, slot->params.seed);
        slot->command = LOAD_PROMPT;
        all_slots_are_idle = false;
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -13,7 +13,6 @@ function amdGPUs {
        "gfx902"
        "gfx904"
        "gfx90c"
        "gfx906"
        "gfx906:xnack-"
        "gfx908:xnack-"
        "gfx90a:xnack+"
--- a/llm/server.go
+++ b/llm/server.go
@@ -606,7 +606,7 @@ array  ::=
 string ::=
  "\"" (
-    [^"\\] |
+    [^"\\\x7F\x00-\x1F] |
    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
  )* "\"" ws
--- a/parser/parser.go
+++ b/parser/parser.go
@@ -3,12 +3,15 @@ package parser
 import (
 	"bufio"
 	"bytes"
 	"encoding/binary"
 	"errors"
 	"fmt"
 	"io"
 	"log/slog"
 	"strconv"
 	"strings"
-	"unicode"
+	"unicode/utf16"
 	"unicode/utf8"
 )
 type File struct {
@@ -69,33 +72,31 @@ func ParseFile(r io.Reader) (*File, error) {
 	var b bytes.Buffer
 	var role string
 	var lineCount int
 	var linePos int
 	var utf16 bool
 	var f File
 	br := bufio.NewReader(r)
-	for {
+
-		r, _, err := br.ReadRune()
+	var sc scannerDecoder = utf8ScannerDecoder{}
-		if errors.Is(err, io.EOF) {
+	if bom, err := br.Peek(2); err != nil {
-			break
+		slog.Warn("error reading byte-order mark", "error", err)
-		} else if err != nil {
+	} else if bytes.Equal(bom, []byte{0xFE, 0xFF}) {
 		sc = utf16ScannerDecoder{binary.LittleEndian}
 		//nolint:errcheck
 		br.Discard(2)
 	} else if bytes.Equal(bom, []byte{0xFF, 0xFE}) {
 		sc = utf16ScannerDecoder{binary.BigEndian}
 		//nolint:errcheck
 		br.Discard(2)
 	}
 	scanner := bufio.NewScanner(br)
 	scanner.Split(sc.ScanBytes)
 	for scanner.Scan() {
 		r, err := sc.DecodeRune(scanner.Bytes())
 		if err != nil {
 			return nil, err
 		}
 		// the utf16 byte order mark will be read as "unreadable" by ReadRune()
 		if isUnreadable(r) && lineCount == 0 && linePos == 0 {
 			utf16 = true
 			continue
 		}
 		// skip the second byte if we're reading utf16
 		if utf16 && r == 0 {
 			continue
 		}
 		next, r, err := parseRuneForState(r, curr)
 		if errors.Is(err, io.ErrUnexpectedEOF) {
 			return nil, fmt.Errorf("%w: %s", err, b.String())
@@ -103,13 +104,6 @@ func ParseFile(r io.Reader) (*File, error) {
 			return nil, err
 		}
 		if isNewline(r) {
 			lineCount++
 			linePos = 0
 		} else {
 			linePos++
 		}
 		// process the state transition, some transitions need to be intercepted and redirected
 		if next != curr {
 			switch curr {
@@ -309,10 +303,6 @@ func isNewline(r rune) bool {
 	return r == '\r' || r == '\n'
 }
 func isUnreadable(r rune) bool {
 	return r == unicode.ReplacementChar
 }
 func isValidMessageRole(role string) bool {
 	return role == "system" || role == "user" || role == "assistant"
 }
@@ -325,3 +315,39 @@ func isValidCommand(cmd string) bool {
 		return false
 	}
 }
 type scannerDecoder interface {
 	ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error)
 	DecodeRune([]byte) (rune, error)
 }
 type utf8ScannerDecoder struct{}
 func (utf8ScannerDecoder) ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error) {
 	return scanBytesN(data, 1, atEOF)
 }
 func (utf8ScannerDecoder) DecodeRune(data []byte) (rune, error) {
 	r, _ := utf8.DecodeRune(data)
 	return r, nil
 }
 type utf16ScannerDecoder struct {
 	binary.ByteOrder
 }
 func (utf16ScannerDecoder) ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error) {
 	return scanBytesN(data, 2, atEOF)
 }
 func (e utf16ScannerDecoder) DecodeRune(data []byte) (rune, error) {
 	return utf16.Decode([]uint16{e.ByteOrder.Uint16(data)})[0], nil
 }
 func scanBytesN(data []byte, n int, atEOF bool) (int, []byte, error) {
 	if atEOF && len(data) == 0 {
 		return 0, nil, nil
 	}
 	return n, data[:n], nil
 }
--- a/server/images.go
+++ b/server/images.go
@@ -28,7 +28,6 @@ import (
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/templates"
 	"github.com/ollama/ollama/types/errtypes"
 	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
@@ -333,7 +332,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
 		switch c.Name {
 		case "model", "adapter":
-			var baseLayers []*layerWithGGML
+			var baseLayers []*layerGGML
 			if name := model.ParseName(c.Args); name.IsValid() {
 				baseLayers, err = parseFromModel(ctx, name, fn)
 				if err != nil {
@@ -435,20 +434,6 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
 					config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount()))
 					config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType().String())
 					config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())
 					if s := baseLayer.GGML.KV().ChatTemplate(); s != "" {
 						if t, err := templates.NamedTemplate(s); err != nil {
 							slog.Debug("template detection", "error", err)
 						} else {
 							layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
 							if err != nil {
 								return err
 							}
 							layer.status = fmt.Sprintf("using autodetected template %s", t.Name)
 							layers = append(layers, layer)
 						}
 					}
 				}
 				layers = append(layers, baseLayer.Layer)
--- a/server/manifest.go
+++ b/server/manifest.go
@@ -3,6 +3,7 @@ package server
 import (
 	"crypto/sha256"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"log/slog"
@@ -43,7 +44,9 @@ func (m *Manifest) Remove() error {
 func (m *Manifest) RemoveLayers() error {
 	for _, layer := range append(m.Layers, m.Config) {
-		if err := layer.Remove(); err != nil {
+		if err := layer.Remove(); errors.Is(err, os.ErrNotExist) {
 			slog.Debug("layer does not exist", "digest", layer.Digest)
 		} else if err != nil {
 			return err
 		}
 	}
--- a/server/model.go
+++ b/server/model.go
@@ -7,6 +7,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
 	"log/slog"
 	"net/http"
 	"os"
 	"path/filepath"
@@ -14,17 +15,18 @@ import (
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/convert"
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/templates"
 	"github.com/ollama/ollama/types/model"
 )
 var intermediateBlobs map[string]string = make(map[string]string)
-type layerWithGGML struct {
+type layerGGML struct {
 	*Layer
 	*llm.GGML
 }
-func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
+func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
 	m, err := ParseNamedManifest(name)
 	switch {
 	case errors.Is(err, os.ErrNotExist):
@@ -66,16 +68,16 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
 				return nil, err
 			}
-			layers = append(layers, &layerWithGGML{layer, ggml})
+			layers = append(layers, &layerGGML{layer, ggml})
 		default:
-			layers = append(layers, &layerWithGGML{layer, nil})
+			layers = append(layers, &layerGGML{layer, nil})
 		}
 	}
 	return layers, nil
 }
-func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
+func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
 	stat, err := file.Stat()
 	if err != nil {
 		return nil, err
@@ -179,13 +181,13 @@ func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(a
 		return nil, err
 	}
-	layers = append(layers, &layerWithGGML{layer, ggml})
+	layers = append(layers, &layerGGML{layer, ggml})
 	intermediateBlobs[digest] = layer.Digest
-	return layers, nil
+	return detectChatTemplate(layers)
 }
-func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
+func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
 	sr := io.NewSectionReader(file, 0, 512)
 	contentType, err := detectContentType(sr)
 	if err != nil {
@@ -227,10 +229,30 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
 			return nil, err
 		}
-		layers = append(layers, &layerWithGGML{layer, ggml})
+		layers = append(layers, &layerGGML{layer, ggml})
 		offset = n
 	}
 	return detectChatTemplate(layers)
 }
 func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
 	for _, layer := range layers {
 		if s := layer.GGML.KV().ChatTemplate(); s != "" {
 			if t, err := templates.NamedTemplate(s); err != nil {
 				slog.Debug("template detection", "error", err)
 			} else {
 				tmpl, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
 				if err != nil {
 					return nil, err
 				}
 				tmpl.status = fmt.Sprintf("using autodetected template %s", t.Name)
 				layers = append(layers, &layerGGML{tmpl, nil})
 			}
 		}
 	}
 	return layers, nil
 }
--- a/server/routes_create_test.go
+++ b/server/routes_create_test.go
@@ -535,7 +535,7 @@ func TestCreateDetectTemplate(t *testing.T) {
 		}
 		checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
-			filepath.Join(p, "blobs", "sha256-06cd2687a518d624073f125f1db1c5c727f77c75e84a138fe745186dbbbb4cd7"),
+			filepath.Join(p, "blobs", "sha256-2f8e594e6f34b1b4d36a246628eeb3365ce442303d656f1fcc69e821722acea0"),
 			filepath.Join(p, "blobs", "sha256-542b217f179c7825eeb5bca3c77d2b75ed05bafbd3451d9188891a60a85337c6"),
 			filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
 		})
--- a/server/routes_delete_test.go
+++ b/server/routes_delete_test.go
@@ -1,12 +1,15 @@
 package server
 import (
 	"bytes"
 	"encoding/json"
 	"fmt"
 	"net/http"
 	"path/filepath"
 	"testing"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/types/model"
 )
 func TestDelete(t *testing.T) {
@@ -69,3 +72,33 @@ func TestDelete(t *testing.T) {
 	checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{})
 	checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{})
 }
 func TestDeleteDuplicateLayers(t *testing.T) {
 	p := t.TempDir()
 	t.Setenv("OLLAMA_MODELS", p)
 	var s Server
 	n := model.ParseName("test")
 	var b bytes.Buffer
 	if err := json.NewEncoder(&b).Encode(&ConfigV2{}); err != nil {
 		t.Fatal(err)
 	}
 	config, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
 	if err != nil {
 		t.Fatal(err)
 	}
 	// create a manifest with duplicate layers
 	if err := WriteManifest(n, config, []*Layer{config}); err != nil {
 		t.Fatal(err)
 	}
 	w := createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test"})
 	if w.Code != http.StatusOK {
 		t.Errorf("expected status code 200, actual %d", w.Code)
 	}
 	checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{})
 }
Author	SHA1	Message	Date
likelovewant	ff50cfb582	Merge branch 'ollama:main' into main	2024-06-14 00:59:47 +08:00
Patrick Devine	c69bc19e46	move OLLAMA_HOST to envconfig (#5009 )	2024-06-12 18:48:16 -04:00
Michael Yang	bba5d177aa	Merge pull request #5004 from ollama/mxyng/fix-templates fix: multiple templates when creating from model	2024-06-12 14:39:29 -07:00
Michael Yang	c16f8af911	fix: multiple templates when creating from model multiple templates may appear in a model if a model is created from another model that 1) has an autodetected template and 2) defines a custom template	2024-06-12 13:35:49 -07:00
likelovewant	edaec3183a	Merge branch 'ollama:main' into main	2024-06-12 15:35:25 +08:00
Michael Yang	217f60c3d9	Merge pull request #4987 from ollama/mxyng/revert-byte-order Revert "Merge pull request #4938 from ollama/mxyng/fix-byte-order"	2024-06-11 16:04:20 -07:00
Michael Yang	7bdcd1da94	Revert "Merge pull request #4938 from ollama/mxyng/fix-byte-order" This reverts commit `f5f245cc15`, reversing changes made to `94d37fdcae`. this change broke gguf v2 which is incorrectly detected as big endian	2024-06-11 15:56:17 -07:00
Jeffrey Morgan	ead259d877	llm: fix seed value not being applied to requests (#4986 )	2024-06-11 14:24:41 -07:00
James Montgomery	2ff45d571d	Add Ollama-hpp to Community Libraries in README. (#4983 )	2024-06-11 11:15:05 -07:00
Michael Yang	0f3cf1d42e	Merge pull request #4715 from ollama/mxyng/utf16-parser proper utf16 support	2024-06-10 11:41:29 -07:00
Michael Yang	5bc029c529	Merge pull request #4921 from ollama/mxyng/import-md update import.md	2024-06-10 11:41:09 -07:00
Michael Yang	e9a9c6a8e8	Merge pull request #4965 from ollama/mxyng/skip-layer-remove fix: skip removing layers that no longer exist	2024-06-10 11:40:03 -07:00
Michael Yang	515f497e6d	fix: skip removing layers that no longer exist	2024-06-10 11:32:19 -07:00
Michael Yang	b27268aaef	add test	2024-06-10 11:32:15 -07:00
Michael Yang	f5f245cc15	Merge pull request #4938 from ollama/mxyng/fix-byte-order fix parsing big endian gguf	2024-06-10 09:38:12 -07:00
Jim Scardelis	94d37fdcae	fix: examples/langchain-python-rag-privategpt/requirements.txt (#3382 )	2024-06-09 10:58:09 -07:00
Craig Hughes	b84aea1685	Critical fix from llama.cpp JSON grammar to forbid un-escaped escape characters inside strings, which breaks parsing. (#3782 )	2024-06-09 10:57:09 -07:00
Napuh	896495de7b	Add instructions to easily install specific versions on faq.md (#4084 ) * Added instructions to easily install specific versions on faq.md * Small typo * Moved instructions on how to install specific version to linux.md * Update docs/linux.md * Update docs/linux.md --------- Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>	2024-06-09 10:49:03 -07:00
dcasota	5528dd9d11	Error handling load_single_document() in ingest.py (#4852 ) load_single_document() handles - corrupt files - empty (zero byte) files - unsupported file extensions	2024-06-09 10:41:07 -07:00
Jeffrey Morgan	943172cbf4	Update api.md	2024-06-08 23:04:32 -07:00
likelovewant	1b5848cbf2	remove gfx906 has conflicts with gfx906：xnack-	2024-06-09 11:46:22 +08:00
Michael Yang	620d5c569e	fix parsing big endian gguf	2024-06-08 12:35:26 -07:00
Michael Yang	b9ce7bf75e	update import.md	2024-06-07 16:45:15 -07:00
Michael Yang	66ab48772f	proper utf16 support	2024-06-05 13:11:50 -07:00