From 66ab48772f4f41f3f27fb93e15ef0cf756bda3d0 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Wed, 29 May 2024 21:37:07 -0700
Subject: [PATCH 01/13] proper utf16 support

---
 parser/parser.go | 92 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 59 insertions(+), 33 deletions(-)

diff --git a/parser/parser.go b/parser/parser.go
index 4f44f6af..c9afc20b 100644
--- a/parser/parser.go
+++ b/parser/parser.go
@@ -3,12 +3,15 @@ package parser
 import (
 	"bufio"
 	"bytes"
+	"encoding/binary"
 	"errors"
 	"fmt"
 	"io"
+	"log/slog"
 	"strconv"
 	"strings"
-	"unicode"
+	"unicode/utf16"
+	"unicode/utf8"
 )
 
 type File struct {
@@ -69,33 +72,31 @@ func ParseFile(r io.Reader) (*File, error) {
 	var b bytes.Buffer
 	var role string
 
-	var lineCount int
-	var linePos int
-
-	var utf16 bool
-
 	var f File
 
 	br := bufio.NewReader(r)
-	for {
-		r, _, err := br.ReadRune()
-		if errors.Is(err, io.EOF) {
-			break
-		} else if err != nil {
+
+	var sc scannerDecoder = utf8ScannerDecoder{}
+	if bom, err := br.Peek(2); err != nil {
+		slog.Warn("error reading byte-order mark", "error", err)
+	} else if bytes.Equal(bom, []byte{0xFE, 0xFF}) {
+		sc = utf16ScannerDecoder{binary.LittleEndian}
+		//nolint:errcheck
+		br.Discard(2)
+	} else if bytes.Equal(bom, []byte{0xFF, 0xFE}) {
+		sc = utf16ScannerDecoder{binary.BigEndian}
+		//nolint:errcheck
+		br.Discard(2)
+	}
+
+	scanner := bufio.NewScanner(br)
+	scanner.Split(sc.ScanBytes)
+	for scanner.Scan() {
+		r, err := sc.DecodeRune(scanner.Bytes())
+		if err != nil {
 			return nil, err
 		}
 
-		// the utf16 byte order mark will be read as "unreadable" by ReadRune()
-		if isUnreadable(r) && lineCount == 0 && linePos == 0 {
-			utf16 = true
-			continue
-		}
-
-		// skip the second byte if we're reading utf16
-		if utf16 && r == 0 {
-			continue
-		}
-
 		next, r, err := parseRuneForState(r, curr)
 		if errors.Is(err, io.ErrUnexpectedEOF) {
 			return nil, fmt.Errorf("%w: %s", err, b.String())
@@ -103,13 +104,6 @@ func ParseFile(r io.Reader) (*File, error) {
 			return nil, err
 		}
 
-		if isNewline(r) {
-			lineCount++
-			linePos = 0
-		} else {
-			linePos++
-		}
-
 		// process the state transition, some transitions need to be intercepted and redirected
 		if next != curr {
 			switch curr {
@@ -309,10 +303,6 @@ func isNewline(r rune) bool {
 	return r == '\r' || r == '\n'
 }
 
-func isUnreadable(r rune) bool {
-	return r == unicode.ReplacementChar
-}
-
 func isValidMessageRole(role string) bool {
 	return role == "system" || role == "user" || role == "assistant"
 }
@@ -325,3 +315,39 @@ func isValidCommand(cmd string) bool {
 		return false
 	}
 }
+
+type scannerDecoder interface {
+	ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error)
+	DecodeRune([]byte) (rune, error)
+}
+
+type utf8ScannerDecoder struct{}
+
+func (utf8ScannerDecoder) ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error) {
+	return scanBytesN(data, 1, atEOF)
+}
+
+func (utf8ScannerDecoder) DecodeRune(data []byte) (rune, error) {
+	r, _ := utf8.DecodeRune(data)
+	return r, nil
+}
+
+type utf16ScannerDecoder struct {
+	binary.ByteOrder
+}
+
+func (utf16ScannerDecoder) ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error) {
+	return scanBytesN(data, 2, atEOF)
+}
+
+func (e utf16ScannerDecoder) DecodeRune(data []byte) (rune, error) {
+	return utf16.Decode([]uint16{e.ByteOrder.Uint16(data)})[0], nil
+}
+
+func scanBytesN(data []byte, n int, atEOF bool) (int, []byte, error) {
+	if atEOF && len(data) == 0 {
+		return 0, nil, nil
+	}
+
+	return n, data[:n], nil
+}

From b9ce7bf75effbe56a4d8f8b237363eb96223c7ae Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Fri, 7 Jun 2024 16:45:15 -0700
Subject: [PATCH 02/13] update import.md

---
 docs/import.md | 227 +++++++++++++++++--------------------------------
 1 file changed, 78 insertions(+), 149 deletions(-)

diff --git a/docs/import.md b/docs/import.md
index 7041b74d..7abe39b2 100644
--- a/docs/import.md
+++ b/docs/import.md
@@ -1,170 +1,99 @@
-# Import a model
+# Import
 
-This guide walks through importing a GGUF, PyTorch or Safetensors model.
+GGUF models and select Safetensors models can be imported directly into Ollama.
 
-## Importing (GGUF)
+## Import GGUF
 
-### Step 1: Write a `Modelfile`
+A binary GGUF file can be imported directly into Ollama through a Modelfile.
 
-Start by creating a `Modelfile`. This file is the blueprint for your model, specifying weights, parameters, prompt templates and more.
-
-```
-FROM ./mistral-7b-v0.1.Q4_0.gguf
+```dockerfile
+FROM /path/to/file.gguf
 ```
 
-(Optional) many chat models require a prompt template in order to answer correctly. A default prompt template can be specified with the `TEMPLATE` instruction in the `Modelfile`:
+## Import Safetensors
 
-```
-FROM ./mistral-7b-v0.1.Q4_0.gguf
-TEMPLATE "[INST] {{ .Prompt }} [/INST]"
+If the model being imported is one of these architectures, it can be imported directly into Ollama through a Modelfile:
+
+ - LlamaForCausalLM
+ - MistralForCausalLM
+ - GemmaForCausalLM
+
+```dockerfile
+FROM /path/to/safetensors/directory
 ```
 
-### Step 2: Create the Ollama model
+For architectures not directly convertable by Ollama, see llama.cpp's [guide](https://github.com/ggerganov/llama.cpp/blob/master/README.md#prepare-and-quantize) on conversion. After conversion, see [Import GGUF](#import-gguf).
 
-Finally, create a model from your `Modelfile`:
+## Automatic Quantization
 
+> [!NOTE]
+> Automatic quantization requires v0.1.35 or higher.
+
+Ollama is capable of quantizing FP16 or FP32 models to any of the supported quantizations with the `-q/--quantize` flag in `ollama create`.
+
+```dockerfile
+FROM /path/to/my/gemma/f16/model
 ```
-ollama create example -f Modelfile
-```
-
-### Step 3: Run your model
-
-Next, test the model with `ollama run`:
-
-```
-ollama run example "What is your favourite condiment?"
-```
-
-## Importing (PyTorch & Safetensors)
-
-> Importing from PyTorch and Safetensors is a longer process than importing from GGUF. Improvements that make it easier are a work in progress.
-
-### Setup
-
-First, clone the `ollama/ollama` repo:
-
-```
-git clone git@github.com:ollama/ollama.git ollama
-cd ollama
-```
-
-and then fetch its `llama.cpp` submodule:
 
 ```shell
-git submodule init
-git submodule update llm/llama.cpp
+$ ollama create -q Q4_K_M mymodel
+transferring model data
+quantizing F16 model to Q4_K_M
+creating new layer sha256:735e246cc1abfd06e9cdcf95504d6789a6cd1ad7577108a70d9902fef503c1bd
+creating new layer sha256:0853f0ad24e5865173bbf9ffcc7b0f5d56b66fd690ab1009867e45e7d2c4db0f
+writing manifest
+success
 ```
 
-Next, install the Python dependencies:
+### Supported Quantizations
 
-```
-python3 -m venv llm/llama.cpp/.venv
-source llm/llama.cpp/.venv/bin/activate
-pip install -r llm/llama.cpp/requirements.txt
+<details>
+<summary>Legacy Quantization</summary>
+
+- `Q4_0`
+- `Q4_1`
+- `Q5_0`
+- `Q5_1`
+- `Q8_0`
+
+</details>
+
+<details>
+<summary>K-means Quantization</summary>`
+
+- `Q3_K_S`
+- `Q3_K_M`
+- `Q3_K_L`
+- `Q4_K_S`
+- `Q4_K_M`
+- `Q5_K_S`
+- `Q5_K_M`
+- `Q6_K`
+
+</details>
+
+> [!NOTE]
+> Activation-aware Weight Quantization (i.e. IQ) are not currently supported for automatic quantization however you can still import the quantized model into Ollama, see [Import GGUF](#import-gguf).
+
+## Template Detection
+
+> [!NOTE]
+> Template detection requires v0.1.42 or higher.
+
+Ollama uses model metadata, specifically `tokenizer.chat_template`, to automatically create a template appropriate for the model you're importing.
+
+```dockerfile
+FROM /path/to/my/gemma/model
 ```
 
-Then build the `quantize` tool:
-
-```
-make -C llm/llama.cpp quantize
+```shell
+$ ollama create mymodel
+transferring model data
+using autodetected template gemma-instruct
+creating new layer sha256:baa2a0edc27d19cc6b7537578a9a7ba1a4e3214dc185ed5ae43692b319af7b84
+creating new layer sha256:ba66c3309914dbef07e5149a648fd1877f030d337a4f240d444ea335008943cb
+writing manifest
+success
 ```
 
-### Clone the HuggingFace repository (optional)
-
-If the model is currently hosted in a HuggingFace repository, first clone that repository to download the raw model.
-
-Install [Git LFS](https://docs.github.com/en/repositories/working-with-files/managing-large-files/installing-git-large-file-storage), verify it's installed, and then clone the model's repository:
-
-```
-git lfs install
-git clone https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 model
-```
-
-### Convert the model
-
-> Note: some model architectures require using specific convert scripts. For example, Qwen models require running `convert-hf-to-gguf.py` instead of `convert.py`
-
-```
-python llm/llama.cpp/convert.py ./model --outtype f16 --outfile converted.bin
-```
-
-### Quantize the model
-
-```
-llm/llama.cpp/quantize converted.bin quantized.bin q4_0
-```
-
-### Step 3: Write a `Modelfile`
-
-Next, create a `Modelfile` for your model:
-
-```
-FROM quantized.bin
-TEMPLATE "[INST] {{ .Prompt }} [/INST]"
-```
-
-### Step 4: Create the Ollama model
-
-Finally, create a model from your `Modelfile`:
-
-```
-ollama create example -f Modelfile
-```
-
-### Step 5: Run your model
-
-Next, test the model with `ollama run`:
-
-```
-ollama run example "What is your favourite condiment?"
-```
-
-## Publishing your model (optional – early alpha)
-
-Publishing models is in early alpha. If you'd like to publish your model to share with others, follow these steps:
-
-1. Create [an account](https://ollama.com/signup)
-2. Copy your Ollama public key:
-  - macOS: `cat ~/.ollama/id_ed25519.pub | pbcopy`
-  - Windows: `type %USERPROFILE%\.ollama\id_ed25519.pub`
-  - Linux: `cat /usr/share/ollama/.ollama/id_ed25519.pub`
-3. Add your public key to your [Ollama account](https://ollama.com/settings/keys)
-
-Next, copy your model to your username's namespace:
-
-```
-ollama cp example <your username>/example
-```
-
-> Note: model names may only contain lowercase letters, digits, and the characters `.`, `-`, and `_`.
-
-Then push the model:
-
-```
-ollama push <your username>/example
-```
-
-After publishing, your model will be available at `https://ollama.com/<your username>/example`.
-
-## Quantization reference
-
-The quantization options are as follow (from highest highest to lowest levels of quantization). Note: some architectures such as Falcon do not support K quants.
-
-- `q2_K`
-- `q3_K`
-- `q3_K_S`
-- `q3_K_M`
-- `q3_K_L`
-- `q4_0` (recommended)
-- `q4_1`
-- `q4_K`
-- `q4_K_S`
-- `q4_K_M`
-- `q5_0`
-- `q5_1`
-- `q5_K`
-- `q5_K_S`
-- `q5_K_M`
-- `q6_K`
-- `q8_0`
-- `f16`
+Defining a template in the Modelfile will disable this feature which may be useful if you want to use a different template than the autodetected one.

From 620d5c569e965ac93ac5c58bca5d3d8938cb98bc Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Sat, 8 Jun 2024 12:32:02 -0700
Subject: [PATCH 03/13] fix parsing big endian gguf

---
 llm/ggml.go | 13 +++++--------
 llm/gguf.go | 15 ++++++++++++++-
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/llm/ggml.go b/llm/ggml.go
index 645447d5..16da4c9d 100644
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -231,8 +231,7 @@ const (
 	// Magic constant for `ggla` files (LoRA adapter).
 	FILE_MAGIC_GGLA = 0x67676C61
 	// Magic constant for `gguf` files (versioned, gguf)
-	FILE_MAGIC_GGUF_LE = 0x46554747
-	FILE_MAGIC_GGUF_BE = 0x47475546
+	FILE_MAGIC_GGUF = 0x46554747
 )
 
 var ErrUnsupportedFormat = errors.New("unsupported model format")
@@ -247,7 +246,7 @@ func DetectGGMLType(b []byte) string {
 		return "ggjt"
 	case FILE_MAGIC_GGLA:
 		return "ggla"
-	case FILE_MAGIC_GGUF_LE, FILE_MAGIC_GGUF_BE:
+	case FILE_MAGIC_GGUF:
 		return "gguf"
 	default:
 		return ""
@@ -255,21 +254,19 @@ func DetectGGMLType(b []byte) string {
 }
 
 func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error) {
-	var magic uint32
+	var magic [4]byte
 	if err := binary.Read(rs, binary.LittleEndian, &magic); err != nil {
 		return nil, 0, err
 	}
 
 	var c container
-	switch magic {
+	switch binary.LittleEndian.Uint32(magic[:]) {
 	case FILE_MAGIC_GGML, FILE_MAGIC_GGMF, FILE_MAGIC_GGJT:
 		return nil, 0, ErrUnsupportedFormat
 	case FILE_MAGIC_GGLA:
 		c = &containerGGLA{}
-	case FILE_MAGIC_GGUF_LE:
+	case FILE_MAGIC_GGUF:
 		c = &containerGGUF{ByteOrder: binary.LittleEndian}
-	case FILE_MAGIC_GGUF_BE:
-		c = &containerGGUF{ByteOrder: binary.BigEndian}
 	default:
 		return nil, 0, errors.New("invalid file magic")
 	}
diff --git a/llm/gguf.go b/llm/gguf.go
index 234efe57..8c64e166 100644
--- a/llm/gguf.go
+++ b/llm/gguf.go
@@ -36,10 +36,23 @@ func (c *containerGGUF) Name() string {
 }
 
 func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
-	if err := binary.Read(rs, c.ByteOrder, &c.Version); err != nil {
+	var version [4]byte
+	if err := binary.Read(rs, c.ByteOrder, &version); err != nil {
 		return nil, err
 	}
 
+	// if the lower 16 bits are 0, the byte order is probably wrong
+	if c.ByteOrder.Uint32(version[:])&1<<4 == 0 {
+		switch c.ByteOrder {
+		case binary.LittleEndian:
+			c.ByteOrder = binary.BigEndian
+		case binary.BigEndian:
+			c.ByteOrder = binary.LittleEndian
+		}
+	}
+
+	c.Version = c.ByteOrder.Uint32(version[:])
+
 	var err error
 	switch c.Version {
 	case 1:

From 943172cbf4d6cc0b8682021bfc9c2d816152615d Mon Sep 17 00:00:00 2001
From: Jeffrey Morgan <jmorganca@gmail.com>
Date: Sat, 8 Jun 2024 23:04:32 -0700
Subject: [PATCH 04/13] Update api.md

---
 docs/api.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/docs/api.md b/docs/api.md
index 64bfbed8..f761f68c 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -1044,11 +1044,10 @@ GET /api/ps
 
 List models that are currently loaded into memory.
 
-\* If a model is loaded completely into system memory, `size_vram` is omitted from the response.
-
 #### Examples
 
 ### Request
+
 ```shell
 curl http://localhost:11434/api/ps
 ```
@@ -1080,4 +1079,4 @@ A single JSON object will be returned.
     }
   ]
 }
-```
\ No newline at end of file
+```

From 5528dd9d1170e7a78a4fdb7684e8944e2052ca8f Mon Sep 17 00:00:00 2001
From: dcasota <14890243+dcasota@users.noreply.github.com>
Date: Sun, 9 Jun 2024 19:41:07 +0200
Subject: [PATCH 05/13] Error handling load_single_document() in ingest.py
 (#4852)

load_single_document() handles
- corrupt files
- empty (zero byte) files
- unsupported file extensions
---
 .../langchain-python-rag-privategpt/ingest.py | 23 +++++++++++++------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/examples/langchain-python-rag-privategpt/ingest.py b/examples/langchain-python-rag-privategpt/ingest.py
index 35324775..0f71ccf0 100755
--- a/examples/langchain-python-rag-privategpt/ingest.py
+++ b/examples/langchain-python-rag-privategpt/ingest.py
@@ -77,13 +77,21 @@ LOADER_MAPPING = {
 
 
 def load_single_document(file_path: str) -> List[Document]:
-    ext = "." + file_path.rsplit(".", 1)[-1]
-    if ext in LOADER_MAPPING:
-        loader_class, loader_args = LOADER_MAPPING[ext]
-        loader = loader_class(file_path, **loader_args)
-        return loader.load()
+    if os.path.getsize(file_path) != 0:
+        filename, ext = os.path.splitext(file_path)
+        if ext in LOADER_MAPPING:
+            loader_class, loader_args = LOADER_MAPPING[ext]
+            try:
+                loader = loader_class(file_path, **loader_args)
+                if loader:
+                    return loader.load()
+            except:
+                print(f"Corrupted file {file_path}. Ignoring it.")
+        else:
+            print(f"Unsupported file {file_path}. Ignoring it.")
+    else:
+        print(f"Empty file {file_path}. Ignoring it.")
 
-    raise ValueError(f"Unsupported file extension '{ext}'")
 
 def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Document]:
     """
@@ -100,7 +108,8 @@ def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Docum
         results = []
         with tqdm(total=len(filtered_files), desc='Loading new documents', ncols=80) as pbar:
             for i, docs in enumerate(pool.imap_unordered(load_single_document, filtered_files)):
-                results.extend(docs)
+                if docs:
+                    results.extend(docs)
                 pbar.update()
 
     return results

From 896495de7b5814cca32ba83d2e9f2bf176ba98c8 Mon Sep 17 00:00:00 2001
From: Napuh <55241721+Napuh@users.noreply.github.com>
Date: Sun, 9 Jun 2024 19:49:03 +0200
Subject: [PATCH 06/13] Add instructions to easily install specific versions on
 faq.md (#4084)

* Added instructions to easily install specific versions on faq.md

* Small typo

* Moved instructions on how to install specific version to linux.md

* Update docs/linux.md

* Update docs/linux.md

---------

Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
---
 docs/linux.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docs/linux.md b/docs/linux.md
index 9e7e06fa..ec730656 100644
--- a/docs/linux.md
+++ b/docs/linux.md
@@ -100,6 +100,16 @@ sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
 sudo chmod +x /usr/bin/ollama
 ```
 
+## Installing specific versions
+
+Use `OLLAMA_VERSION` environment variable with the install script to install a specific version of Ollama, including pre-releases. You can find the version numbers in the [releases page](https://github.com/ollama/ollama/releases). 
+
+For example:
+
+```
+curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.1.32 sh
+```
+
 ## Viewing logs
 
 To view logs of Ollama running as a startup service, run:

From b84aea1685329f107c5547b1cca4efcf15c19f52 Mon Sep 17 00:00:00 2001
From: Craig Hughes <craig.git@rungie.com>
Date: Sun, 9 Jun 2024 13:57:09 -0400
Subject: [PATCH 07/13] Critical fix from llama.cpp JSON grammar to forbid
 un-escaped escape characters inside strings, which breaks parsing. (#3782)

---
 llm/server.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llm/server.go b/llm/server.go
index 6cb01fa0..0a815798 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -606,7 +606,7 @@ array  ::=
 
 string ::=
   "\"" (
-    [^"\\] |
+    [^"\\\x7F\x00-\x1F] |
     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
   )* "\"" ws
 

From 94d37fdcae30ddeb6c9f65c8707004f5ec9eaf33 Mon Sep 17 00:00:00 2001
From: Jim Scardelis <jim@jceltd.com>
Date: Sun, 9 Jun 2024 10:58:09 -0700
Subject: [PATCH 08/13] fix:
 examples/langchain-python-rag-privategpt/requirements.txt (#3382)

---
 examples/langchain-python-rag-privategpt/requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/langchain-python-rag-privategpt/requirements.txt b/examples/langchain-python-rag-privategpt/requirements.txt
index d4c5cb2e..0aad1fe5 100644
--- a/examples/langchain-python-rag-privategpt/requirements.txt
+++ b/examples/langchain-python-rag-privategpt/requirements.txt
@@ -11,4 +11,5 @@ tabulate==0.9.0
 pandoc==2.3
 pypandoc==1.11
 tqdm==4.66.1
-sentence_transformers==2.2.2
\ No newline at end of file
+sentence_transformers==2.2.2
+numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability
\ No newline at end of file

From b27268aaefa03f401f0f09c1e21b63872288ddc7 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Mon, 10 Jun 2024 11:31:34 -0700
Subject: [PATCH 09/13] add test

---
 server/routes_delete_test.go | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/server/routes_delete_test.go b/server/routes_delete_test.go
index d0990009..0f003574 100644
--- a/server/routes_delete_test.go
+++ b/server/routes_delete_test.go
@@ -1,12 +1,15 @@
 package server
 
 import (
+	"bytes"
+	"encoding/json"
 	"fmt"
 	"net/http"
 	"path/filepath"
 	"testing"
 
 	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/types/model"
 )
 
 func TestDelete(t *testing.T) {
@@ -69,3 +72,33 @@ func TestDelete(t *testing.T) {
 	checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{})
 	checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{})
 }
+
+func TestDeleteDuplicateLayers(t *testing.T) {
+	p := t.TempDir()
+	t.Setenv("OLLAMA_MODELS", p)
+	var s Server
+
+	n := model.ParseName("test")
+
+	var b bytes.Buffer
+	if err := json.NewEncoder(&b).Encode(&ConfigV2{}); err != nil {
+		t.Fatal(err)
+	}
+
+	config, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// create a manifest with duplicate layers
+	if err := WriteManifest(n, config, []*Layer{config}); err != nil {
+		t.Fatal(err)
+	}
+
+	w := createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test"})
+	if w.Code != http.StatusOK {
+		t.Errorf("expected status code 200, actual %d", w.Code)
+	}
+
+	checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{})
+}

From 515f497e6d5c0a102f913693fc8d4ed5eefd4ebb Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Mon, 10 Jun 2024 11:15:03 -0700
Subject: [PATCH 10/13] fix: skip removing layers that no longer exist

---
 server/manifest.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/server/manifest.go b/server/manifest.go
index d0675724..61dd1ab4 100644
--- a/server/manifest.go
+++ b/server/manifest.go
@@ -3,6 +3,7 @@ package server
 import (
 	"crypto/sha256"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"log/slog"
@@ -43,7 +44,9 @@ func (m *Manifest) Remove() error {
 
 func (m *Manifest) RemoveLayers() error {
 	for _, layer := range append(m.Layers, m.Config) {
-		if err := layer.Remove(); err != nil {
+		if err := layer.Remove(); errors.Is(err, os.ErrNotExist) {
+			slog.Debug("layer does not exist", "digest", layer.Digest)
+		} else if err != nil {
 			return err
 		}
 	}

From 2ff45d571de4463fcebf779373ae7337cf969ebf Mon Sep 17 00:00:00 2001
From: James Montgomery <James.Brian.Montgomery@gmail.com>
Date: Tue, 11 Jun 2024 14:15:05 -0400
Subject: [PATCH 11/13] Add Ollama-hpp to Community Libraries in README.
 (#4983)

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index d29d04f7..2fdc63cb 100644
--- a/README.md
+++ b/README.md
@@ -332,6 +332,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
 - [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
 - [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
+- [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
 - [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
 - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
 - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)

From ead259d877fc8b20f7943f1f9e8eeaae0acfa52a Mon Sep 17 00:00:00 2001
From: Jeffrey Morgan <jmorganca@gmail.com>
Date: Tue, 11 Jun 2024 14:24:41 -0700
Subject: [PATCH 12/13] llm: fix seed value not being applied to requests
 (#4986)

---
 docs/api.md               | 5 ++---
 llm/ext_server/server.cpp | 7 +------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/docs/api.md b/docs/api.md
index f761f68c..35f1def3 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -250,7 +250,7 @@ curl http://localhost:11434/api/generate -d '{
 
 #### Request (Reproducible outputs)
 
-For reproducible outputs, set `temperature` to 0 and `seed` to a number:
+For reproducible outputs, set `seed` to a number:
 
 ##### Request
 
@@ -259,8 +259,7 @@ curl http://localhost:11434/api/generate -d '{
   "model": "mistral",
   "prompt": "Why is the sky blue?",
   "options": {
-    "seed": 123,
-    "temperature": 0
+    "seed": 123
   }
 }'
 ```
diff --git a/llm/ext_server/server.cpp b/llm/ext_server/server.cpp
index 7d14e48e..93e71562 100644
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -359,7 +359,6 @@ struct llama_server_context
 
     // slots / clients
     std::vector<server_slot> slots;
-    json default_generation_settings_for_props;
 
     llama_server_queue    queue_tasks;
     llama_server_response queue_results;
@@ -483,9 +482,6 @@ struct llama_server_context
             slots.push_back(slot);
         }
 
-        default_generation_settings_for_props = get_formated_generation(slots.front());
-        default_generation_settings_for_props["seed"] = -1;
-
         batch = llama_batch_init(n_ctx, 0, params.n_parallel);
     }
 
@@ -584,7 +580,7 @@ struct llama_server_context
         slot->sparams.mirostat_eta      = json_value(data, "mirostat_eta",      default_sparams.mirostat_eta);
         slot->sparams.penalize_nl       = json_value(data, "penalize_nl",       default_sparams.penalize_nl);
         slot->params.n_keep             = json_value(data, "n_keep",            slot->params.n_keep);
-        slot->params.seed               = json_value(data, "seed",              default_params.seed);
+        slot->sparams.seed              = json_value(data, "seed",              default_params.seed);
         slot->sparams.grammar           = json_value(data, "grammar",           default_sparams.grammar);
         slot->sparams.n_probs           = json_value(data, "n_probs",           default_sparams.n_probs);
         slot->sparams.min_keep          = json_value(data, "min_keep",          default_sparams.min_keep);
@@ -811,7 +807,6 @@ struct llama_server_context
             llama_sampling_free(slot->ctx_sampling);
         }
         slot->ctx_sampling = llama_sampling_init(slot->sparams);
-        llama_set_rng_seed(ctx, slot->params.seed);
         slot->command = LOAD_PROMPT;
 
         all_slots_are_idle = false;

From 7bdcd1da9417ecc1a1069f2d73f0b88a3a43857d Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Tue, 11 Jun 2024 15:55:44 -0700
Subject: [PATCH 13/13] Revert "Merge pull request #4938 from
 ollama/mxyng/fix-byte-order"

This reverts commit f5f245cc154580fa7b4052c001d2a7e3d771cfb8, reversing
changes made to 94d37fdcae30ddeb6c9f65c8707004f5ec9eaf33.

this change broke gguf v2 which is incorrectly detected as big endian
---
 llm/ggml.go | 13 ++++++++-----
 llm/gguf.go | 15 +--------------
 2 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/llm/ggml.go b/llm/ggml.go
index 16da4c9d..645447d5 100644
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -231,7 +231,8 @@ const (
 	// Magic constant for `ggla` files (LoRA adapter).
 	FILE_MAGIC_GGLA = 0x67676C61
 	// Magic constant for `gguf` files (versioned, gguf)
-	FILE_MAGIC_GGUF = 0x46554747
+	FILE_MAGIC_GGUF_LE = 0x46554747
+	FILE_MAGIC_GGUF_BE = 0x47475546
 )
 
 var ErrUnsupportedFormat = errors.New("unsupported model format")
@@ -246,7 +247,7 @@ func DetectGGMLType(b []byte) string {
 		return "ggjt"
 	case FILE_MAGIC_GGLA:
 		return "ggla"
-	case FILE_MAGIC_GGUF:
+	case FILE_MAGIC_GGUF_LE, FILE_MAGIC_GGUF_BE:
 		return "gguf"
 	default:
 		return ""
@@ -254,19 +255,21 @@ func DetectGGMLType(b []byte) string {
 }
 
 func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error) {
-	var magic [4]byte
+	var magic uint32
 	if err := binary.Read(rs, binary.LittleEndian, &magic); err != nil {
 		return nil, 0, err
 	}
 
 	var c container
-	switch binary.LittleEndian.Uint32(magic[:]) {
+	switch magic {
 	case FILE_MAGIC_GGML, FILE_MAGIC_GGMF, FILE_MAGIC_GGJT:
 		return nil, 0, ErrUnsupportedFormat
 	case FILE_MAGIC_GGLA:
 		c = &containerGGLA{}
-	case FILE_MAGIC_GGUF:
+	case FILE_MAGIC_GGUF_LE:
 		c = &containerGGUF{ByteOrder: binary.LittleEndian}
+	case FILE_MAGIC_GGUF_BE:
+		c = &containerGGUF{ByteOrder: binary.BigEndian}
 	default:
 		return nil, 0, errors.New("invalid file magic")
 	}
diff --git a/llm/gguf.go b/llm/gguf.go
index 8c64e166..234efe57 100644
--- a/llm/gguf.go
+++ b/llm/gguf.go
@@ -36,23 +36,10 @@ func (c *containerGGUF) Name() string {
 }
 
 func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
-	var version [4]byte
-	if err := binary.Read(rs, c.ByteOrder, &version); err != nil {
+	if err := binary.Read(rs, c.ByteOrder, &c.Version); err != nil {
 		return nil, err
 	}
 
-	// if the lower 16 bits are 0, the byte order is probably wrong
-	if c.ByteOrder.Uint32(version[:])&1<<4 == 0 {
-		switch c.ByteOrder {
-		case binary.LittleEndian:
-			c.ByteOrder = binary.BigEndian
-		case binary.BigEndian:
-			c.ByteOrder = binary.LittleEndian
-		}
-	}
-
-	c.Version = c.ByteOrder.Uint32(version[:])
-
 	var err error
 	switch c.Version {
 	case 1: