mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-22 06:43:57 +00:00
Compare commits
24 Commits
v0.1.42-al
...
v0.1.43-al
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ff50cfb582 | ||
|
|
c69bc19e46 | ||
|
|
bba5d177aa | ||
|
|
c16f8af911 | ||
|
|
edaec3183a | ||
|
|
217f60c3d9 | ||
|
|
7bdcd1da94 | ||
|
|
ead259d877 | ||
|
|
2ff45d571d | ||
|
|
0f3cf1d42e | ||
|
|
5bc029c529 | ||
|
|
e9a9c6a8e8 | ||
|
|
515f497e6d | ||
|
|
b27268aaef | ||
|
|
f5f245cc15 | ||
|
|
94d37fdcae | ||
|
|
b84aea1685 | ||
|
|
896495de7b | ||
|
|
5528dd9d11 | ||
|
|
943172cbf4 | ||
|
|
1b5848cbf2 | ||
|
|
620d5c569e | ||
|
|
b9ce7bf75e | ||
|
|
66ab48772f |
@@ -350,6 +350,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
|
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
|
||||||
- [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
|
- [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
|
||||||
- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
|
- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
|
||||||
|
- [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
|
||||||
- [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
|
- [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
|
||||||
- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
|
- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
|
||||||
- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
|
- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
|
||||||
|
|||||||
@@ -23,11 +23,9 @@ import (
|
|||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
)
|
)
|
||||||
@@ -65,10 +63,7 @@ func checkError(resp *http.Response, body []byte) error {
|
|||||||
// If the variable is not specified, a default ollama host and port will be
|
// If the variable is not specified, a default ollama host and port will be
|
||||||
// used.
|
// used.
|
||||||
func ClientFromEnvironment() (*Client, error) {
|
func ClientFromEnvironment() (*Client, error) {
|
||||||
ollamaHost, err := GetOllamaHost()
|
ollamaHost := envconfig.Host
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &Client{
|
return &Client{
|
||||||
base: &url.URL{
|
base: &url.URL{
|
||||||
@@ -79,52 +74,6 @@ func ClientFromEnvironment() (*Client, error) {
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type OllamaHost struct {
|
|
||||||
Scheme string
|
|
||||||
Host string
|
|
||||||
Port string
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetOllamaHost() (OllamaHost, error) {
|
|
||||||
defaultPort := "11434"
|
|
||||||
|
|
||||||
hostVar := os.Getenv("OLLAMA_HOST")
|
|
||||||
hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
|
|
||||||
|
|
||||||
scheme, hostport, ok := strings.Cut(hostVar, "://")
|
|
||||||
switch {
|
|
||||||
case !ok:
|
|
||||||
scheme, hostport = "http", hostVar
|
|
||||||
case scheme == "http":
|
|
||||||
defaultPort = "80"
|
|
||||||
case scheme == "https":
|
|
||||||
defaultPort = "443"
|
|
||||||
}
|
|
||||||
|
|
||||||
// trim trailing slashes
|
|
||||||
hostport = strings.TrimRight(hostport, "/")
|
|
||||||
|
|
||||||
host, port, err := net.SplitHostPort(hostport)
|
|
||||||
if err != nil {
|
|
||||||
host, port = "127.0.0.1", defaultPort
|
|
||||||
if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
|
|
||||||
host = ip.String()
|
|
||||||
} else if hostport != "" {
|
|
||||||
host = hostport
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
|
|
||||||
return OllamaHost{}, ErrInvalidHostPort
|
|
||||||
}
|
|
||||||
|
|
||||||
return OllamaHost{
|
|
||||||
Scheme: scheme,
|
|
||||||
Host: host,
|
|
||||||
Port: port,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewClient(base *url.URL, http *http.Client) *Client {
|
func NewClient(base *url.URL, http *http.Client) *Client {
|
||||||
return &Client{
|
return &Client{
|
||||||
base: base,
|
base: base,
|
||||||
|
|||||||
@@ -1,11 +1,9 @@
|
|||||||
package api
|
package api
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"net"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/ollama/ollama/envconfig"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestClientFromEnvironment(t *testing.T) {
|
func TestClientFromEnvironment(t *testing.T) {
|
||||||
@@ -35,6 +33,7 @@ func TestClientFromEnvironment(t *testing.T) {
|
|||||||
for k, v := range testCases {
|
for k, v := range testCases {
|
||||||
t.Run(k, func(t *testing.T) {
|
t.Run(k, func(t *testing.T) {
|
||||||
t.Setenv("OLLAMA_HOST", v.value)
|
t.Setenv("OLLAMA_HOST", v.value)
|
||||||
|
envconfig.LoadConfig()
|
||||||
|
|
||||||
client, err := ClientFromEnvironment()
|
client, err := ClientFromEnvironment()
|
||||||
if err != v.err {
|
if err != v.err {
|
||||||
@@ -46,40 +45,4 @@ func TestClientFromEnvironment(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
hostTestCases := map[string]*testCase{
|
|
||||||
"empty": {value: "", expect: "127.0.0.1:11434"},
|
|
||||||
"only address": {value: "1.2.3.4", expect: "1.2.3.4:11434"},
|
|
||||||
"only port": {value: ":1234", expect: ":1234"},
|
|
||||||
"address and port": {value: "1.2.3.4:1234", expect: "1.2.3.4:1234"},
|
|
||||||
"hostname": {value: "example.com", expect: "example.com:11434"},
|
|
||||||
"hostname and port": {value: "example.com:1234", expect: "example.com:1234"},
|
|
||||||
"zero port": {value: ":0", expect: ":0"},
|
|
||||||
"too large port": {value: ":66000", err: ErrInvalidHostPort},
|
|
||||||
"too small port": {value: ":-1", err: ErrInvalidHostPort},
|
|
||||||
"ipv6 localhost": {value: "[::1]", expect: "[::1]:11434"},
|
|
||||||
"ipv6 world open": {value: "[::]", expect: "[::]:11434"},
|
|
||||||
"ipv6 no brackets": {value: "::1", expect: "[::1]:11434"},
|
|
||||||
"ipv6 + port": {value: "[::1]:1337", expect: "[::1]:1337"},
|
|
||||||
"extra space": {value: " 1.2.3.4 ", expect: "1.2.3.4:11434"},
|
|
||||||
"extra quotes": {value: "\"1.2.3.4\"", expect: "1.2.3.4:11434"},
|
|
||||||
"extra space+quotes": {value: " \" 1.2.3.4 \" ", expect: "1.2.3.4:11434"},
|
|
||||||
"extra single quotes": {value: "'1.2.3.4'", expect: "1.2.3.4:11434"},
|
|
||||||
}
|
|
||||||
|
|
||||||
for k, v := range hostTestCases {
|
|
||||||
t.Run(k, func(t *testing.T) {
|
|
||||||
t.Setenv("OLLAMA_HOST", v.value)
|
|
||||||
|
|
||||||
oh, err := GetOllamaHost()
|
|
||||||
if err != v.err {
|
|
||||||
t.Fatalf("expected %s, got %s", v.err, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err == nil {
|
|
||||||
host := net.JoinHostPort(oh.Host, oh.Port)
|
|
||||||
assert.Equal(t, v.expect, host, fmt.Sprintf("%s: expected %s, got %s", k, v.expect, host))
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package api
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"math"
|
"math"
|
||||||
@@ -377,8 +376,6 @@ func (m *Metrics) Summary() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
|
|
||||||
|
|
||||||
func (opts *Options) FromMap(m map[string]interface{}) error {
|
func (opts *Options) FromMap(m map[string]interface{}) error {
|
||||||
valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
|
valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
|
||||||
typeOpts := reflect.TypeOf(opts).Elem() // types of the fields in the options struct
|
typeOpts := reflect.TypeOf(opts).Elem() // types of the fields in the options struct
|
||||||
|
|||||||
@@ -960,17 +960,11 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func RunServer(cmd *cobra.Command, _ []string) error {
|
func RunServer(cmd *cobra.Command, _ []string) error {
|
||||||
// retrieve the OLLAMA_HOST environment variable
|
|
||||||
ollamaHost, err := api.GetOllamaHost()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := initializeKeypair(); err != nil {
|
if err := initializeKeypair(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
ln, err := net.Listen("tcp", net.JoinHostPort(ollamaHost.Host, ollamaHost.Port))
|
ln, err := net.Listen("tcp", net.JoinHostPort(envconfig.Host.Host, envconfig.Host.Port))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -250,7 +250,7 @@ curl http://localhost:11434/api/generate -d '{
|
|||||||
|
|
||||||
#### Request (Reproducible outputs)
|
#### Request (Reproducible outputs)
|
||||||
|
|
||||||
For reproducible outputs, set `temperature` to 0 and `seed` to a number:
|
For reproducible outputs, set `seed` to a number:
|
||||||
|
|
||||||
##### Request
|
##### Request
|
||||||
|
|
||||||
@@ -259,8 +259,7 @@ curl http://localhost:11434/api/generate -d '{
|
|||||||
"model": "mistral",
|
"model": "mistral",
|
||||||
"prompt": "Why is the sky blue?",
|
"prompt": "Why is the sky blue?",
|
||||||
"options": {
|
"options": {
|
||||||
"seed": 123,
|
"seed": 123
|
||||||
"temperature": 0
|
|
||||||
}
|
}
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
@@ -1044,11 +1043,10 @@ GET /api/ps
|
|||||||
|
|
||||||
List models that are currently loaded into memory.
|
List models that are currently loaded into memory.
|
||||||
|
|
||||||
\* If a model is loaded completely into system memory, `size_vram` is omitted from the response.
|
|
||||||
|
|
||||||
#### Examples
|
#### Examples
|
||||||
|
|
||||||
### Request
|
### Request
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/ps
|
curl http://localhost:11434/api/ps
|
||||||
```
|
```
|
||||||
|
|||||||
227
docs/import.md
227
docs/import.md
@@ -1,170 +1,99 @@
|
|||||||
# Import a model
|
# Import
|
||||||
|
|
||||||
This guide walks through importing a GGUF, PyTorch or Safetensors model.
|
GGUF models and select Safetensors models can be imported directly into Ollama.
|
||||||
|
|
||||||
## Importing (GGUF)
|
## Import GGUF
|
||||||
|
|
||||||
### Step 1: Write a `Modelfile`
|
A binary GGUF file can be imported directly into Ollama through a Modelfile.
|
||||||
|
|
||||||
Start by creating a `Modelfile`. This file is the blueprint for your model, specifying weights, parameters, prompt templates and more.
|
```dockerfile
|
||||||
|
FROM /path/to/file.gguf
|
||||||
```
|
|
||||||
FROM ./mistral-7b-v0.1.Q4_0.gguf
|
|
||||||
```
|
```
|
||||||
|
|
||||||
(Optional) many chat models require a prompt template in order to answer correctly. A default prompt template can be specified with the `TEMPLATE` instruction in the `Modelfile`:
|
## Import Safetensors
|
||||||
|
|
||||||
```
|
If the model being imported is one of these architectures, it can be imported directly into Ollama through a Modelfile:
|
||||||
FROM ./mistral-7b-v0.1.Q4_0.gguf
|
|
||||||
TEMPLATE "[INST] {{ .Prompt }} [/INST]"
|
- LlamaForCausalLM
|
||||||
|
- MistralForCausalLM
|
||||||
|
- GemmaForCausalLM
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
FROM /path/to/safetensors/directory
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 2: Create the Ollama model
|
For architectures not directly convertable by Ollama, see llama.cpp's [guide](https://github.com/ggerganov/llama.cpp/blob/master/README.md#prepare-and-quantize) on conversion. After conversion, see [Import GGUF](#import-gguf).
|
||||||
|
|
||||||
Finally, create a model from your `Modelfile`:
|
## Automatic Quantization
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> Automatic quantization requires v0.1.35 or higher.
|
||||||
|
|
||||||
|
Ollama is capable of quantizing FP16 or FP32 models to any of the supported quantizations with the `-q/--quantize` flag in `ollama create`.
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
FROM /path/to/my/gemma/f16/model
|
||||||
```
|
```
|
||||||
ollama create example -f Modelfile
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 3: Run your model
|
|
||||||
|
|
||||||
Next, test the model with `ollama run`:
|
|
||||||
|
|
||||||
```
|
|
||||||
ollama run example "What is your favourite condiment?"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Importing (PyTorch & Safetensors)
|
|
||||||
|
|
||||||
> Importing from PyTorch and Safetensors is a longer process than importing from GGUF. Improvements that make it easier are a work in progress.
|
|
||||||
|
|
||||||
### Setup
|
|
||||||
|
|
||||||
First, clone the `ollama/ollama` repo:
|
|
||||||
|
|
||||||
```
|
|
||||||
git clone git@github.com:ollama/ollama.git ollama
|
|
||||||
cd ollama
|
|
||||||
```
|
|
||||||
|
|
||||||
and then fetch its `llama.cpp` submodule:
|
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
git submodule init
|
$ ollama create -q Q4_K_M mymodel
|
||||||
git submodule update llm/llama.cpp
|
transferring model data
|
||||||
|
quantizing F16 model to Q4_K_M
|
||||||
|
creating new layer sha256:735e246cc1abfd06e9cdcf95504d6789a6cd1ad7577108a70d9902fef503c1bd
|
||||||
|
creating new layer sha256:0853f0ad24e5865173bbf9ffcc7b0f5d56b66fd690ab1009867e45e7d2c4db0f
|
||||||
|
writing manifest
|
||||||
|
success
|
||||||
```
|
```
|
||||||
|
|
||||||
Next, install the Python dependencies:
|
### Supported Quantizations
|
||||||
|
|
||||||
```
|
<details>
|
||||||
python3 -m venv llm/llama.cpp/.venv
|
<summary>Legacy Quantization</summary>
|
||||||
source llm/llama.cpp/.venv/bin/activate
|
|
||||||
pip install -r llm/llama.cpp/requirements.txt
|
- `Q4_0`
|
||||||
|
- `Q4_1`
|
||||||
|
- `Q5_0`
|
||||||
|
- `Q5_1`
|
||||||
|
- `Q8_0`
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>K-means Quantization</summary>`
|
||||||
|
|
||||||
|
- `Q3_K_S`
|
||||||
|
- `Q3_K_M`
|
||||||
|
- `Q3_K_L`
|
||||||
|
- `Q4_K_S`
|
||||||
|
- `Q4_K_M`
|
||||||
|
- `Q5_K_S`
|
||||||
|
- `Q5_K_M`
|
||||||
|
- `Q6_K`
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> Activation-aware Weight Quantization (i.e. IQ) are not currently supported for automatic quantization however you can still import the quantized model into Ollama, see [Import GGUF](#import-gguf).
|
||||||
|
|
||||||
|
## Template Detection
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> Template detection requires v0.1.42 or higher.
|
||||||
|
|
||||||
|
Ollama uses model metadata, specifically `tokenizer.chat_template`, to automatically create a template appropriate for the model you're importing.
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
FROM /path/to/my/gemma/model
|
||||||
```
|
```
|
||||||
|
|
||||||
Then build the `quantize` tool:
|
```shell
|
||||||
|
$ ollama create mymodel
|
||||||
```
|
transferring model data
|
||||||
make -C llm/llama.cpp quantize
|
using autodetected template gemma-instruct
|
||||||
|
creating new layer sha256:baa2a0edc27d19cc6b7537578a9a7ba1a4e3214dc185ed5ae43692b319af7b84
|
||||||
|
creating new layer sha256:ba66c3309914dbef07e5149a648fd1877f030d337a4f240d444ea335008943cb
|
||||||
|
writing manifest
|
||||||
|
success
|
||||||
```
|
```
|
||||||
|
|
||||||
### Clone the HuggingFace repository (optional)
|
Defining a template in the Modelfile will disable this feature which may be useful if you want to use a different template than the autodetected one.
|
||||||
|
|
||||||
If the model is currently hosted in a HuggingFace repository, first clone that repository to download the raw model.
|
|
||||||
|
|
||||||
Install [Git LFS](https://docs.github.com/en/repositories/working-with-files/managing-large-files/installing-git-large-file-storage), verify it's installed, and then clone the model's repository:
|
|
||||||
|
|
||||||
```
|
|
||||||
git lfs install
|
|
||||||
git clone https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 model
|
|
||||||
```
|
|
||||||
|
|
||||||
### Convert the model
|
|
||||||
|
|
||||||
> Note: some model architectures require using specific convert scripts. For example, Qwen models require running `convert-hf-to-gguf.py` instead of `convert.py`
|
|
||||||
|
|
||||||
```
|
|
||||||
python llm/llama.cpp/convert.py ./model --outtype f16 --outfile converted.bin
|
|
||||||
```
|
|
||||||
|
|
||||||
### Quantize the model
|
|
||||||
|
|
||||||
```
|
|
||||||
llm/llama.cpp/quantize converted.bin quantized.bin q4_0
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 3: Write a `Modelfile`
|
|
||||||
|
|
||||||
Next, create a `Modelfile` for your model:
|
|
||||||
|
|
||||||
```
|
|
||||||
FROM quantized.bin
|
|
||||||
TEMPLATE "[INST] {{ .Prompt }} [/INST]"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 4: Create the Ollama model
|
|
||||||
|
|
||||||
Finally, create a model from your `Modelfile`:
|
|
||||||
|
|
||||||
```
|
|
||||||
ollama create example -f Modelfile
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 5: Run your model
|
|
||||||
|
|
||||||
Next, test the model with `ollama run`:
|
|
||||||
|
|
||||||
```
|
|
||||||
ollama run example "What is your favourite condiment?"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Publishing your model (optional – early alpha)
|
|
||||||
|
|
||||||
Publishing models is in early alpha. If you'd like to publish your model to share with others, follow these steps:
|
|
||||||
|
|
||||||
1. Create [an account](https://ollama.com/signup)
|
|
||||||
2. Copy your Ollama public key:
|
|
||||||
- macOS: `cat ~/.ollama/id_ed25519.pub | pbcopy`
|
|
||||||
- Windows: `type %USERPROFILE%\.ollama\id_ed25519.pub`
|
|
||||||
- Linux: `cat /usr/share/ollama/.ollama/id_ed25519.pub`
|
|
||||||
3. Add your public key to your [Ollama account](https://ollama.com/settings/keys)
|
|
||||||
|
|
||||||
Next, copy your model to your username's namespace:
|
|
||||||
|
|
||||||
```
|
|
||||||
ollama cp example <your username>/example
|
|
||||||
```
|
|
||||||
|
|
||||||
> Note: model names may only contain lowercase letters, digits, and the characters `.`, `-`, and `_`.
|
|
||||||
|
|
||||||
Then push the model:
|
|
||||||
|
|
||||||
```
|
|
||||||
ollama push <your username>/example
|
|
||||||
```
|
|
||||||
|
|
||||||
After publishing, your model will be available at `https://ollama.com/<your username>/example`.
|
|
||||||
|
|
||||||
## Quantization reference
|
|
||||||
|
|
||||||
The quantization options are as follow (from highest highest to lowest levels of quantization). Note: some architectures such as Falcon do not support K quants.
|
|
||||||
|
|
||||||
- `q2_K`
|
|
||||||
- `q3_K`
|
|
||||||
- `q3_K_S`
|
|
||||||
- `q3_K_M`
|
|
||||||
- `q3_K_L`
|
|
||||||
- `q4_0` (recommended)
|
|
||||||
- `q4_1`
|
|
||||||
- `q4_K`
|
|
||||||
- `q4_K_S`
|
|
||||||
- `q4_K_M`
|
|
||||||
- `q5_0`
|
|
||||||
- `q5_1`
|
|
||||||
- `q5_K`
|
|
||||||
- `q5_K_S`
|
|
||||||
- `q5_K_M`
|
|
||||||
- `q6_K`
|
|
||||||
- `q8_0`
|
|
||||||
- `f16`
|
|
||||||
|
|||||||
@@ -100,6 +100,16 @@ sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
|
|||||||
sudo chmod +x /usr/bin/ollama
|
sudo chmod +x /usr/bin/ollama
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Installing specific versions
|
||||||
|
|
||||||
|
Use `OLLAMA_VERSION` environment variable with the install script to install a specific version of Ollama, including pre-releases. You can find the version numbers in the [releases page](https://github.com/ollama/ollama/releases).
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```
|
||||||
|
curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.1.32 sh
|
||||||
|
```
|
||||||
|
|
||||||
## Viewing logs
|
## Viewing logs
|
||||||
|
|
||||||
To view logs of Ollama running as a startup service, run:
|
To view logs of Ollama running as a startup service, run:
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package envconfig
|
package envconfig
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net"
|
"net"
|
||||||
@@ -11,6 +12,18 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type OllamaHost struct {
|
||||||
|
Scheme string
|
||||||
|
Host string
|
||||||
|
Port string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o OllamaHost) String() string {
|
||||||
|
return fmt.Sprintf("%s://%s:%s", o.Scheme, o.Host, o.Port)
|
||||||
|
}
|
||||||
|
|
||||||
|
var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// Set via OLLAMA_ORIGINS in the environment
|
// Set via OLLAMA_ORIGINS in the environment
|
||||||
AllowOrigins []string
|
AllowOrigins []string
|
||||||
@@ -34,6 +47,8 @@ var (
|
|||||||
NoPrune bool
|
NoPrune bool
|
||||||
// Set via OLLAMA_NUM_PARALLEL in the environment
|
// Set via OLLAMA_NUM_PARALLEL in the environment
|
||||||
NumParallel int
|
NumParallel int
|
||||||
|
// Set via OLLAMA_HOST in the environment
|
||||||
|
Host *OllamaHost
|
||||||
// Set via OLLAMA_RUNNERS_DIR in the environment
|
// Set via OLLAMA_RUNNERS_DIR in the environment
|
||||||
RunnersDir string
|
RunnersDir string
|
||||||
// Set via OLLAMA_TMPDIR in the environment
|
// Set via OLLAMA_TMPDIR in the environment
|
||||||
@@ -50,7 +65,7 @@ func AsMap() map[string]EnvVar {
|
|||||||
return map[string]EnvVar{
|
return map[string]EnvVar{
|
||||||
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
||||||
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
|
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
|
||||||
"OLLAMA_HOST": {"OLLAMA_HOST", "", "IP Address for the ollama server (default 127.0.0.1:11434)"},
|
"OLLAMA_HOST": {"OLLAMA_HOST", Host, "IP Address for the ollama server (default 127.0.0.1:11434)"},
|
||||||
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
|
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
|
||||||
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"},
|
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"},
|
||||||
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models (default 1)"},
|
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models (default 1)"},
|
||||||
@@ -216,4 +231,54 @@ func LoadConfig() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
KeepAlive = clean("OLLAMA_KEEP_ALIVE")
|
KeepAlive = clean("OLLAMA_KEEP_ALIVE")
|
||||||
|
|
||||||
|
var err error
|
||||||
|
Host, err = getOllamaHost()
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("invalid setting", "OLLAMA_HOST", Host, "error", err, "using default port", Host.Port)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getOllamaHost() (*OllamaHost, error) {
|
||||||
|
defaultPort := "11434"
|
||||||
|
|
||||||
|
hostVar := os.Getenv("OLLAMA_HOST")
|
||||||
|
hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
|
||||||
|
|
||||||
|
scheme, hostport, ok := strings.Cut(hostVar, "://")
|
||||||
|
switch {
|
||||||
|
case !ok:
|
||||||
|
scheme, hostport = "http", hostVar
|
||||||
|
case scheme == "http":
|
||||||
|
defaultPort = "80"
|
||||||
|
case scheme == "https":
|
||||||
|
defaultPort = "443"
|
||||||
|
}
|
||||||
|
|
||||||
|
// trim trailing slashes
|
||||||
|
hostport = strings.TrimRight(hostport, "/")
|
||||||
|
|
||||||
|
host, port, err := net.SplitHostPort(hostport)
|
||||||
|
if err != nil {
|
||||||
|
host, port = "127.0.0.1", defaultPort
|
||||||
|
if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
|
||||||
|
host = ip.String()
|
||||||
|
} else if hostport != "" {
|
||||||
|
host = hostport
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
|
||||||
|
return &OllamaHost{
|
||||||
|
Scheme: scheme,
|
||||||
|
Host: host,
|
||||||
|
Port: defaultPort,
|
||||||
|
}, ErrInvalidHostPort
|
||||||
|
}
|
||||||
|
|
||||||
|
return &OllamaHost{
|
||||||
|
Scheme: scheme,
|
||||||
|
Host: host,
|
||||||
|
Port: port,
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,11 @@
|
|||||||
package envconfig
|
package envconfig
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -21,3 +24,48 @@ func TestConfig(t *testing.T) {
|
|||||||
LoadConfig()
|
LoadConfig()
|
||||||
require.True(t, FlashAttention)
|
require.True(t, FlashAttention)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestClientFromEnvironment(t *testing.T) {
|
||||||
|
type testCase struct {
|
||||||
|
value string
|
||||||
|
expect string
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
hostTestCases := map[string]*testCase{
|
||||||
|
"empty": {value: "", expect: "127.0.0.1:11434"},
|
||||||
|
"only address": {value: "1.2.3.4", expect: "1.2.3.4:11434"},
|
||||||
|
"only port": {value: ":1234", expect: ":1234"},
|
||||||
|
"address and port": {value: "1.2.3.4:1234", expect: "1.2.3.4:1234"},
|
||||||
|
"hostname": {value: "example.com", expect: "example.com:11434"},
|
||||||
|
"hostname and port": {value: "example.com:1234", expect: "example.com:1234"},
|
||||||
|
"zero port": {value: ":0", expect: ":0"},
|
||||||
|
"too large port": {value: ":66000", err: ErrInvalidHostPort},
|
||||||
|
"too small port": {value: ":-1", err: ErrInvalidHostPort},
|
||||||
|
"ipv6 localhost": {value: "[::1]", expect: "[::1]:11434"},
|
||||||
|
"ipv6 world open": {value: "[::]", expect: "[::]:11434"},
|
||||||
|
"ipv6 no brackets": {value: "::1", expect: "[::1]:11434"},
|
||||||
|
"ipv6 + port": {value: "[::1]:1337", expect: "[::1]:1337"},
|
||||||
|
"extra space": {value: " 1.2.3.4 ", expect: "1.2.3.4:11434"},
|
||||||
|
"extra quotes": {value: "\"1.2.3.4\"", expect: "1.2.3.4:11434"},
|
||||||
|
"extra space+quotes": {value: " \" 1.2.3.4 \" ", expect: "1.2.3.4:11434"},
|
||||||
|
"extra single quotes": {value: "'1.2.3.4'", expect: "1.2.3.4:11434"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v := range hostTestCases {
|
||||||
|
t.Run(k, func(t *testing.T) {
|
||||||
|
t.Setenv("OLLAMA_HOST", v.value)
|
||||||
|
LoadConfig()
|
||||||
|
|
||||||
|
oh, err := getOllamaHost()
|
||||||
|
if err != v.err {
|
||||||
|
t.Fatalf("expected %s, got %s", v.err, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
host := net.JoinHostPort(oh.Host, oh.Port)
|
||||||
|
assert.Equal(t, v.expect, host, fmt.Sprintf("%s: expected %s, got %s", k, v.expect, host))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -77,13 +77,21 @@ LOADER_MAPPING = {
|
|||||||
|
|
||||||
|
|
||||||
def load_single_document(file_path: str) -> List[Document]:
|
def load_single_document(file_path: str) -> List[Document]:
|
||||||
ext = "." + file_path.rsplit(".", 1)[-1]
|
if os.path.getsize(file_path) != 0:
|
||||||
if ext in LOADER_MAPPING:
|
filename, ext = os.path.splitext(file_path)
|
||||||
loader_class, loader_args = LOADER_MAPPING[ext]
|
if ext in LOADER_MAPPING:
|
||||||
loader = loader_class(file_path, **loader_args)
|
loader_class, loader_args = LOADER_MAPPING[ext]
|
||||||
return loader.load()
|
try:
|
||||||
|
loader = loader_class(file_path, **loader_args)
|
||||||
|
if loader:
|
||||||
|
return loader.load()
|
||||||
|
except:
|
||||||
|
print(f"Corrupted file {file_path}. Ignoring it.")
|
||||||
|
else:
|
||||||
|
print(f"Unsupported file {file_path}. Ignoring it.")
|
||||||
|
else:
|
||||||
|
print(f"Empty file {file_path}. Ignoring it.")
|
||||||
|
|
||||||
raise ValueError(f"Unsupported file extension '{ext}'")
|
|
||||||
|
|
||||||
def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Document]:
|
def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Document]:
|
||||||
"""
|
"""
|
||||||
@@ -100,7 +108,8 @@ def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Docum
|
|||||||
results = []
|
results = []
|
||||||
with tqdm(total=len(filtered_files), desc='Loading new documents', ncols=80) as pbar:
|
with tqdm(total=len(filtered_files), desc='Loading new documents', ncols=80) as pbar:
|
||||||
for i, docs in enumerate(pool.imap_unordered(load_single_document, filtered_files)):
|
for i, docs in enumerate(pool.imap_unordered(load_single_document, filtered_files)):
|
||||||
results.extend(docs)
|
if docs:
|
||||||
|
results.extend(docs)
|
||||||
pbar.update()
|
pbar.update()
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|||||||
@@ -12,3 +12,4 @@ pandoc==2.3
|
|||||||
pypandoc==1.11
|
pypandoc==1.11
|
||||||
tqdm==4.66.1
|
tqdm==4.66.1
|
||||||
sentence_transformers==2.2.2
|
sentence_transformers==2.2.2
|
||||||
|
numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability
|
||||||
7
llm/ext_server/server.cpp
vendored
7
llm/ext_server/server.cpp
vendored
@@ -359,7 +359,6 @@ struct llama_server_context
|
|||||||
|
|
||||||
// slots / clients
|
// slots / clients
|
||||||
std::vector<server_slot> slots;
|
std::vector<server_slot> slots;
|
||||||
json default_generation_settings_for_props;
|
|
||||||
|
|
||||||
llama_server_queue queue_tasks;
|
llama_server_queue queue_tasks;
|
||||||
llama_server_response queue_results;
|
llama_server_response queue_results;
|
||||||
@@ -483,9 +482,6 @@ struct llama_server_context
|
|||||||
slots.push_back(slot);
|
slots.push_back(slot);
|
||||||
}
|
}
|
||||||
|
|
||||||
default_generation_settings_for_props = get_formated_generation(slots.front());
|
|
||||||
default_generation_settings_for_props["seed"] = -1;
|
|
||||||
|
|
||||||
batch = llama_batch_init(n_ctx, 0, params.n_parallel);
|
batch = llama_batch_init(n_ctx, 0, params.n_parallel);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -584,7 +580,7 @@ struct llama_server_context
|
|||||||
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
||||||
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
||||||
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
||||||
slot->params.seed = json_value(data, "seed", default_params.seed);
|
slot->sparams.seed = json_value(data, "seed", default_params.seed);
|
||||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||||
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||||
@@ -811,7 +807,6 @@ struct llama_server_context
|
|||||||
llama_sampling_free(slot->ctx_sampling);
|
llama_sampling_free(slot->ctx_sampling);
|
||||||
}
|
}
|
||||||
slot->ctx_sampling = llama_sampling_init(slot->sparams);
|
slot->ctx_sampling = llama_sampling_init(slot->sparams);
|
||||||
llama_set_rng_seed(ctx, slot->params.seed);
|
|
||||||
slot->command = LOAD_PROMPT;
|
slot->command = LOAD_PROMPT;
|
||||||
|
|
||||||
all_slots_are_idle = false;
|
all_slots_are_idle = false;
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ function amdGPUs {
|
|||||||
"gfx902"
|
"gfx902"
|
||||||
"gfx904"
|
"gfx904"
|
||||||
"gfx90c"
|
"gfx90c"
|
||||||
"gfx906"
|
|
||||||
"gfx906:xnack-"
|
"gfx906:xnack-"
|
||||||
"gfx908:xnack-"
|
"gfx908:xnack-"
|
||||||
"gfx90a:xnack+"
|
"gfx90a:xnack+"
|
||||||
|
|||||||
@@ -606,7 +606,7 @@ array ::=
|
|||||||
|
|
||||||
string ::=
|
string ::=
|
||||||
"\"" (
|
"\"" (
|
||||||
[^"\\] |
|
[^"\\\x7F\x00-\x1F] |
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
||||||
)* "\"" ws
|
)* "\"" ws
|
||||||
|
|
||||||
|
|||||||
@@ -3,12 +3,15 @@ package parser
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"log/slog"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode"
|
"unicode/utf16"
|
||||||
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
type File struct {
|
type File struct {
|
||||||
@@ -69,33 +72,31 @@ func ParseFile(r io.Reader) (*File, error) {
|
|||||||
var b bytes.Buffer
|
var b bytes.Buffer
|
||||||
var role string
|
var role string
|
||||||
|
|
||||||
var lineCount int
|
|
||||||
var linePos int
|
|
||||||
|
|
||||||
var utf16 bool
|
|
||||||
|
|
||||||
var f File
|
var f File
|
||||||
|
|
||||||
br := bufio.NewReader(r)
|
br := bufio.NewReader(r)
|
||||||
for {
|
|
||||||
r, _, err := br.ReadRune()
|
var sc scannerDecoder = utf8ScannerDecoder{}
|
||||||
if errors.Is(err, io.EOF) {
|
if bom, err := br.Peek(2); err != nil {
|
||||||
break
|
slog.Warn("error reading byte-order mark", "error", err)
|
||||||
} else if err != nil {
|
} else if bytes.Equal(bom, []byte{0xFE, 0xFF}) {
|
||||||
|
sc = utf16ScannerDecoder{binary.LittleEndian}
|
||||||
|
//nolint:errcheck
|
||||||
|
br.Discard(2)
|
||||||
|
} else if bytes.Equal(bom, []byte{0xFF, 0xFE}) {
|
||||||
|
sc = utf16ScannerDecoder{binary.BigEndian}
|
||||||
|
//nolint:errcheck
|
||||||
|
br.Discard(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(br)
|
||||||
|
scanner.Split(sc.ScanBytes)
|
||||||
|
for scanner.Scan() {
|
||||||
|
r, err := sc.DecodeRune(scanner.Bytes())
|
||||||
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// the utf16 byte order mark will be read as "unreadable" by ReadRune()
|
|
||||||
if isUnreadable(r) && lineCount == 0 && linePos == 0 {
|
|
||||||
utf16 = true
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// skip the second byte if we're reading utf16
|
|
||||||
if utf16 && r == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
next, r, err := parseRuneForState(r, curr)
|
next, r, err := parseRuneForState(r, curr)
|
||||||
if errors.Is(err, io.ErrUnexpectedEOF) {
|
if errors.Is(err, io.ErrUnexpectedEOF) {
|
||||||
return nil, fmt.Errorf("%w: %s", err, b.String())
|
return nil, fmt.Errorf("%w: %s", err, b.String())
|
||||||
@@ -103,13 +104,6 @@ func ParseFile(r io.Reader) (*File, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if isNewline(r) {
|
|
||||||
lineCount++
|
|
||||||
linePos = 0
|
|
||||||
} else {
|
|
||||||
linePos++
|
|
||||||
}
|
|
||||||
|
|
||||||
// process the state transition, some transitions need to be intercepted and redirected
|
// process the state transition, some transitions need to be intercepted and redirected
|
||||||
if next != curr {
|
if next != curr {
|
||||||
switch curr {
|
switch curr {
|
||||||
@@ -309,10 +303,6 @@ func isNewline(r rune) bool {
|
|||||||
return r == '\r' || r == '\n'
|
return r == '\r' || r == '\n'
|
||||||
}
|
}
|
||||||
|
|
||||||
func isUnreadable(r rune) bool {
|
|
||||||
return r == unicode.ReplacementChar
|
|
||||||
}
|
|
||||||
|
|
||||||
func isValidMessageRole(role string) bool {
|
func isValidMessageRole(role string) bool {
|
||||||
return role == "system" || role == "user" || role == "assistant"
|
return role == "system" || role == "user" || role == "assistant"
|
||||||
}
|
}
|
||||||
@@ -325,3 +315,39 @@ func isValidCommand(cmd string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type scannerDecoder interface {
|
||||||
|
ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error)
|
||||||
|
DecodeRune([]byte) (rune, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type utf8ScannerDecoder struct{}
|
||||||
|
|
||||||
|
func (utf8ScannerDecoder) ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||||
|
return scanBytesN(data, 1, atEOF)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (utf8ScannerDecoder) DecodeRune(data []byte) (rune, error) {
|
||||||
|
r, _ := utf8.DecodeRune(data)
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type utf16ScannerDecoder struct {
|
||||||
|
binary.ByteOrder
|
||||||
|
}
|
||||||
|
|
||||||
|
func (utf16ScannerDecoder) ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||||
|
return scanBytesN(data, 2, atEOF)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e utf16ScannerDecoder) DecodeRune(data []byte) (rune, error) {
|
||||||
|
return utf16.Decode([]uint16{e.ByteOrder.Uint16(data)})[0], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func scanBytesN(data []byte, n int, atEOF bool) (int, []byte, error) {
|
||||||
|
if atEOF && len(data) == 0 {
|
||||||
|
return 0, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return n, data[:n], nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -28,7 +28,6 @@ import (
|
|||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/parser"
|
||||||
"github.com/ollama/ollama/templates"
|
|
||||||
"github.com/ollama/ollama/types/errtypes"
|
"github.com/ollama/ollama/types/errtypes"
|
||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
@@ -333,7 +332,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
|||||||
|
|
||||||
switch c.Name {
|
switch c.Name {
|
||||||
case "model", "adapter":
|
case "model", "adapter":
|
||||||
var baseLayers []*layerWithGGML
|
var baseLayers []*layerGGML
|
||||||
if name := model.ParseName(c.Args); name.IsValid() {
|
if name := model.ParseName(c.Args); name.IsValid() {
|
||||||
baseLayers, err = parseFromModel(ctx, name, fn)
|
baseLayers, err = parseFromModel(ctx, name, fn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -435,20 +434,6 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
|||||||
config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount()))
|
config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount()))
|
||||||
config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType().String())
|
config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType().String())
|
||||||
config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())
|
config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())
|
||||||
|
|
||||||
if s := baseLayer.GGML.KV().ChatTemplate(); s != "" {
|
|
||||||
if t, err := templates.NamedTemplate(s); err != nil {
|
|
||||||
slog.Debug("template detection", "error", err)
|
|
||||||
} else {
|
|
||||||
layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
layer.status = fmt.Sprintf("using autodetected template %s", t.Name)
|
|
||||||
layers = append(layers, layer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
layers = append(layers, baseLayer.Layer)
|
layers = append(layers, baseLayer.Layer)
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package server
|
|||||||
import (
|
import (
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
@@ -43,7 +44,9 @@ func (m *Manifest) Remove() error {
|
|||||||
|
|
||||||
func (m *Manifest) RemoveLayers() error {
|
func (m *Manifest) RemoveLayers() error {
|
||||||
for _, layer := range append(m.Layers, m.Config) {
|
for _, layer := range append(m.Layers, m.Config) {
|
||||||
if err := layer.Remove(); err != nil {
|
if err := layer.Remove(); errors.Is(err, os.ErrNotExist) {
|
||||||
|
slog.Debug("layer does not exist", "digest", layer.Digest)
|
||||||
|
} else if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"log/slog"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -14,17 +15,18 @@ import (
|
|||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/convert"
|
"github.com/ollama/ollama/convert"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
|
"github.com/ollama/ollama/templates"
|
||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
var intermediateBlobs map[string]string = make(map[string]string)
|
var intermediateBlobs map[string]string = make(map[string]string)
|
||||||
|
|
||||||
type layerWithGGML struct {
|
type layerGGML struct {
|
||||||
*Layer
|
*Layer
|
||||||
*llm.GGML
|
*llm.GGML
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
|
func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
||||||
m, err := ParseNamedManifest(name)
|
m, err := ParseNamedManifest(name)
|
||||||
switch {
|
switch {
|
||||||
case errors.Is(err, os.ErrNotExist):
|
case errors.Is(err, os.ErrNotExist):
|
||||||
@@ -66,16 +68,16 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
layers = append(layers, &layerWithGGML{layer, ggml})
|
layers = append(layers, &layerGGML{layer, ggml})
|
||||||
default:
|
default:
|
||||||
layers = append(layers, &layerWithGGML{layer, nil})
|
layers = append(layers, &layerGGML{layer, nil})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return layers, nil
|
return layers, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
|
func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
||||||
stat, err := file.Stat()
|
stat, err := file.Stat()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -179,13 +181,13 @@ func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(a
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
layers = append(layers, &layerWithGGML{layer, ggml})
|
layers = append(layers, &layerGGML{layer, ggml})
|
||||||
|
|
||||||
intermediateBlobs[digest] = layer.Digest
|
intermediateBlobs[digest] = layer.Digest
|
||||||
return layers, nil
|
return detectChatTemplate(layers)
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) {
|
func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
||||||
sr := io.NewSectionReader(file, 0, 512)
|
sr := io.NewSectionReader(file, 0, 512)
|
||||||
contentType, err := detectContentType(sr)
|
contentType, err := detectContentType(sr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -227,10 +229,30 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
layers = append(layers, &layerWithGGML{layer, ggml})
|
layers = append(layers, &layerGGML{layer, ggml})
|
||||||
offset = n
|
offset = n
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return detectChatTemplate(layers)
|
||||||
|
}
|
||||||
|
|
||||||
|
func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
|
||||||
|
for _, layer := range layers {
|
||||||
|
if s := layer.GGML.KV().ChatTemplate(); s != "" {
|
||||||
|
if t, err := templates.NamedTemplate(s); err != nil {
|
||||||
|
slog.Debug("template detection", "error", err)
|
||||||
|
} else {
|
||||||
|
tmpl, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
tmpl.status = fmt.Sprintf("using autodetected template %s", t.Name)
|
||||||
|
layers = append(layers, &layerGGML{tmpl, nil})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return layers, nil
|
return layers, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -535,7 +535,7 @@ func TestCreateDetectTemplate(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
|
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
|
||||||
filepath.Join(p, "blobs", "sha256-06cd2687a518d624073f125f1db1c5c727f77c75e84a138fe745186dbbbb4cd7"),
|
filepath.Join(p, "blobs", "sha256-2f8e594e6f34b1b4d36a246628eeb3365ce442303d656f1fcc69e821722acea0"),
|
||||||
filepath.Join(p, "blobs", "sha256-542b217f179c7825eeb5bca3c77d2b75ed05bafbd3451d9188891a60a85337c6"),
|
filepath.Join(p, "blobs", "sha256-542b217f179c7825eeb5bca3c77d2b75ed05bafbd3451d9188891a60a85337c6"),
|
||||||
filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
|
filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -1,12 +1,15 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/ollama/ollama/types/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestDelete(t *testing.T) {
|
func TestDelete(t *testing.T) {
|
||||||
@@ -69,3 +72,33 @@ func TestDelete(t *testing.T) {
|
|||||||
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{})
|
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{})
|
||||||
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{})
|
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDeleteDuplicateLayers(t *testing.T) {
|
||||||
|
p := t.TempDir()
|
||||||
|
t.Setenv("OLLAMA_MODELS", p)
|
||||||
|
var s Server
|
||||||
|
|
||||||
|
n := model.ParseName("test")
|
||||||
|
|
||||||
|
var b bytes.Buffer
|
||||||
|
if err := json.NewEncoder(&b).Encode(&ConfigV2{}); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
config, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// create a manifest with duplicate layers
|
||||||
|
if err := WriteManifest(n, config, []*Layer{config}); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
w := createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test"})
|
||||||
|
if w.Code != http.StatusOK {
|
||||||
|
t.Errorf("expected status code 200, actual %d", w.Code)
|
||||||
|
}
|
||||||
|
|
||||||
|
checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{})
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user