div(cparams.n_seq_max, 0.0f);
-+ for (uint32_t i = 0; i < cparams.n_seq_max; ++i) {
- const uint64_t s = sum[i];
- if (s > 0) {
- div[i] = 1.0f/float(s);
-@@ -13879,14 +13876,11 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
- GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_cls->buffer));
-
- uint32_t * data = (uint32_t *) lctx.inp_cls->data;
-- memset(lctx.inp_cls->data, 0, n_tokens * ggml_element_size(lctx.inp_cls));
-+ memset(lctx.inp_cls->data, 0, cparams.n_seq_max * ggml_element_size(lctx.inp_cls));
-
- for (int i = 0; i < n_tokens; ++i) {
- const llama_seq_id seq_id = batch.seq_id[i][0];
- const llama_pos pos = batch.pos[i];
--
-- GGML_ASSERT(seq_id < n_tokens && "seq_id cannot be larger than n_tokens with pooling_type == CLS");
--
- if (pos == 0) {
- data[seq_id] = i;
- }
diff --git a/llm/server.go b/llm/server.go
index 9347a458..c38bc6bb 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -258,7 +258,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
params = append(params, "--mlock")
}
- if gpu.IsNUMA() {
+ if gpu.IsNUMA() && gpus[0].Library == "cpu" {
numaMode := "distribute"
if runtime.GOOS == "linux" {
if _, err := exec.LookPath("numactl"); err == nil {
@@ -409,7 +409,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
}
if err = s.cmd.Start(); err != nil {
- // Detect permission denied and augment them essage about noexec
+ // Detect permission denied and augment the message about noexec
if errors.Is(err, os.ErrPermission) {
finalErr = fmt.Errorf("unable to start server %w. %s may have noexec set. Set OLLAMA_TMPDIR for server to a writable executable directory", err, dir)
continue
diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1
index 9cebf1f4..eb8570c8 100644
--- a/scripts/build_windows.ps1
+++ b/scripts/build_windows.ps1
@@ -122,8 +122,8 @@ function buildOllama() {
/csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
- New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\bin\ -Force
- cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\bin\
+ New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\ -Force
+ cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\
}
function buildApp() {
diff --git a/server/images.go b/server/images.go
index 8b3a67cf..b5bf7ad6 100644
--- a/server/images.go
+++ b/server/images.go
@@ -369,13 +369,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
parameters := make(map[string]any)
var layers []Layer
+ var baseLayers []*layerGGML
for _, c := range modelfile.Commands {
mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
+ command := c.Name
- switch c.Name {
+ switch command {
case "model", "adapter":
- var baseLayers []*layerGGML
- if name := model.ParseName(c.Args); name.IsValid() {
+ if name := model.ParseName(c.Args); name.IsValid() && command == "model" {
baseLayers, err = parseFromModel(ctx, name, fn)
if err != nil {
return err
@@ -409,14 +410,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
}
defer blob.Close()
- baseLayers, err = parseFromFile(ctx, blob, digest, fn)
+ baseLayers, err = parseFromFile(ctx, command, baseLayers, blob, digest, fn)
if err != nil {
return err
}
} else if file, err := os.Open(realpath(modelFileDir, c.Args)); err == nil {
defer file.Close()
- baseLayers, err = parseFromFile(ctx, file, "", fn)
+ baseLayers, err = parseFromFile(ctx, command, baseLayers, file, "", fn)
if err != nil {
return err
}
diff --git a/server/layer.go b/server/layer.go
index c666bd10..0bdee72b 100644
--- a/server/layer.go
+++ b/server/layer.go
@@ -51,6 +51,9 @@ func NewLayer(r io.Reader, mediatype string) (Layer, error) {
if err := os.Rename(temp.Name(), blob); err != nil {
return Layer{}, err
}
+ if err := os.Chmod(blob, 0o644); err != nil {
+ return Layer{}, err
+ }
}
return Layer{
diff --git a/server/model.go b/server/model.go
index b17bf0e3..55fb2d8d 100644
--- a/server/model.go
+++ b/server/model.go
@@ -81,7 +81,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
return layers, nil
}
-func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
+func parseFromZipFile(_ context.Context, command string, baseLayers []*layerGGML, f *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
fi, err := f.Stat()
if err != nil {
return nil, err
@@ -108,16 +108,38 @@ func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.
defer t.Close()
defer os.Remove(t.Name())
- fn(api.ProgressResponse{Status: "converting model"})
- if err := convert.Convert(convert.NewZipReader(r, p, 32<<20), t); err != nil {
- return nil, err
+ var layerType string
+
+ switch command {
+ case "adapter":
+ var baseModel *llm.GGML
+ for _, l := range baseLayers {
+ if l.GGML != nil {
+ baseModel = l.GGML
+ break
+ }
+ }
+
+ if baseModel == nil {
+ return nil, fmt.Errorf("no base model specified for the adapter")
+ }
+
+ if err := convert.ConvertAdapter(convert.NewZipReader(r, p, 32<<20), t, baseModel.KV()); err != nil {
+ return nil, err
+ }
+ layerType = "application/vnd.ollama.image.adapter"
+ case "model":
+ if err := convert.ConvertModel(convert.NewZipReader(r, p, 32<<20), t); err != nil {
+ return nil, err
+ }
+ layerType = "application/vnd.ollama.image.model"
}
if _, err := t.Seek(0, io.SeekStart); err != nil {
return nil, err
}
- layer, err := NewLayer(t, "application/vnd.ollama.image.model")
+ layer, err := NewLayer(t, layerType)
if err != nil {
return nil, err
}
@@ -139,7 +161,7 @@ func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.
return detectChatTemplate(layers)
}
-func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
+func parseFromFile(ctx context.Context, command string, baseLayers []*layerGGML, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
sr := io.NewSectionReader(file, 0, 512)
contentType, err := detectContentType(sr)
if err != nil {
@@ -150,7 +172,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
case "gguf", "ggla":
// noop
case "application/zip":
- return parseFromZipFile(ctx, file, digest, fn)
+ return parseFromZipFile(ctx, command, baseLayers, file, digest, fn)
default:
return nil, fmt.Errorf("unsupported content type: %s", contentType)
}
@@ -170,7 +192,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
}
mediatype := "application/vnd.ollama.image.model"
- if ggml.Name() == "ggla" {
+ if ggml.Name() == "ggla" || ggml.KV().Kind() == "adapter" {
mediatype = "application/vnd.ollama.image.adapter"
} else if ggml.KV().Architecture() == "clip" {
mediatype = "application/vnd.ollama.image.projector"
diff --git a/server/model_test.go b/server/model_test.go
index 63fc408d..7753c549 100644
--- a/server/model_test.go
+++ b/server/model_test.go
@@ -153,7 +153,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
t.Fatalf("failed to seek to start: %v", err)
}
- layers, err := parseFromFile(context.Background(), file, "", func(api.ProgressResponse) {})
+ layers, err := parseFromFile(context.Background(), "model", []*layerGGML{}, file, "", func(api.ProgressResponse) {})
if err != nil {
t.Fatalf("failed to parse from file: %v", err)
}
@@ -166,7 +166,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
t.Fatalf("failed to seek to start: %v", err)
}
- layers2, err := parseFromFile(context.Background(), file, layers[0].Digest, func(api.ProgressResponse) {})
+ layers2, err := parseFromFile(context.Background(), "model", []*layerGGML{}, file, layers[0].Digest, func(api.ProgressResponse) {})
if err != nil {
t.Fatalf("failed to parse from file: %v", err)
}
@@ -206,7 +206,7 @@ func TestParseLayerFromCopy(t *testing.T) {
t.Fatalf("failed to seek to start: %v", err)
}
- layers, err := parseFromFile(context.Background(), file2, "", func(api.ProgressResponse) {})
+ layers, err := parseFromFile(context.Background(), "model", []*layerGGML{}, file2, "", func(api.ProgressResponse) {})
if err != nil {
t.Fatalf("failed to parse from file: %v", err)
}
diff --git a/server/routes.go b/server/routes.go
index 6c470c17..5e9f51e1 100644
--- a/server/routes.go
+++ b/server/routes.go
@@ -463,7 +463,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
c.JSON(http.StatusOK, resp)
}
-func (s *Server) PullModelHandler(c *gin.Context) {
+func (s *Server) PullHandler(c *gin.Context) {
var req api.PullRequest
err := c.ShouldBindJSON(&req)
switch {
@@ -513,7 +513,7 @@ func (s *Server) PullModelHandler(c *gin.Context) {
streamResponse(c, ch)
}
-func (s *Server) PushModelHandler(c *gin.Context) {
+func (s *Server) PushHandler(c *gin.Context) {
var req api.PushRequest
err := c.ShouldBindJSON(&req)
switch {
@@ -577,7 +577,7 @@ func checkNameExists(name model.Name) error {
return nil
}
-func (s *Server) CreateModelHandler(c *gin.Context) {
+func (s *Server) CreateHandler(c *gin.Context) {
var r api.CreateRequest
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
@@ -647,7 +647,7 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
streamResponse(c, ch)
}
-func (s *Server) DeleteModelHandler(c *gin.Context) {
+func (s *Server) DeleteHandler(c *gin.Context) {
var r api.DeleteRequest
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
@@ -680,7 +680,7 @@ func (s *Server) DeleteModelHandler(c *gin.Context) {
}
}
-func (s *Server) ShowModelHandler(c *gin.Context) {
+func (s *Server) ShowHandler(c *gin.Context) {
var req api.ShowRequest
err := c.ShouldBindJSON(&req)
switch {
@@ -829,7 +829,7 @@ func getKVData(digest string, verbose bool) (llm.KV, error) {
return kv, nil
}
-func (s *Server) ListModelsHandler(c *gin.Context) {
+func (s *Server) ListHandler(c *gin.Context) {
ms, err := Manifests()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
@@ -879,7 +879,7 @@ func (s *Server) ListModelsHandler(c *gin.Context) {
c.JSON(http.StatusOK, api.ListResponse{Models: models})
}
-func (s *Server) CopyModelHandler(c *gin.Context) {
+func (s *Server) CopyHandler(c *gin.Context) {
var r api.CopyRequest
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
@@ -1081,33 +1081,33 @@ func (s *Server) GenerateRoutes() http.Handler {
allowedHostsMiddleware(s.addr),
)
- r.POST("/api/pull", s.PullModelHandler)
+ r.POST("/api/pull", s.PullHandler)
r.POST("/api/generate", s.GenerateHandler)
r.POST("/api/chat", s.ChatHandler)
r.POST("/api/embed", s.EmbedHandler)
r.POST("/api/embeddings", s.EmbeddingsHandler)
- r.POST("/api/create", s.CreateModelHandler)
- r.POST("/api/push", s.PushModelHandler)
- r.POST("/api/copy", s.CopyModelHandler)
- r.DELETE("/api/delete", s.DeleteModelHandler)
- r.POST("/api/show", s.ShowModelHandler)
+ r.POST("/api/create", s.CreateHandler)
+ r.POST("/api/push", s.PushHandler)
+ r.POST("/api/copy", s.CopyHandler)
+ r.DELETE("/api/delete", s.DeleteHandler)
+ r.POST("/api/show", s.ShowHandler)
r.POST("/api/blobs/:digest", s.CreateBlobHandler)
r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
- r.GET("/api/ps", s.ProcessHandler)
+ r.GET("/api/ps", s.PsHandler)
// Compatibility endpoints
r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler)
r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler)
r.POST("/v1/embeddings", openai.EmbeddingsMiddleware(), s.EmbedHandler)
- r.GET("/v1/models", openai.ListMiddleware(), s.ListModelsHandler)
- r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowModelHandler)
+ r.GET("/v1/models", openai.ListMiddleware(), s.ListHandler)
+ r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowHandler)
for _, method := range []string{http.MethodGet, http.MethodHead} {
r.Handle(method, "/", func(c *gin.Context) {
c.String(http.StatusOK, "Ollama is running")
})
- r.Handle(method, "/api/tags", s.ListModelsHandler)
+ r.Handle(method, "/api/tags", s.ListHandler)
r.Handle(method, "/api/version", func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"version": version.Version})
})
@@ -1269,7 +1269,7 @@ func streamResponse(c *gin.Context, ch chan any) {
})
}
-func (s *Server) ProcessHandler(c *gin.Context) {
+func (s *Server) PsHandler(c *gin.Context) {
models := []api.ProcessModelResponse{}
for _, v := range s.sched.loaded {
diff --git a/server/routes_create_test.go b/server/routes_create_test.go
index 4de07b25..d436f26c 100644
--- a/server/routes_create_test.go
+++ b/server/routes_create_test.go
@@ -93,7 +93,7 @@ func TestCreateFromBin(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -120,7 +120,7 @@ func TestCreateFromModel(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -134,7 +134,7 @@ func TestCreateFromModel(t *testing.T) {
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
- w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test2",
Modelfile: "FROM test",
Stream: &stream,
@@ -162,7 +162,7 @@ func TestCreateRemovesLayers(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -182,7 +182,7 @@ func TestCreateRemovesLayers(t *testing.T) {
filepath.Join(p, "blobs", "sha256-bc80b03733773e0728011b2f4adf34c458b400e1aad48cb28d61170f3a2ad2d6"),
})
- w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -210,7 +210,7 @@ func TestCreateUnsetsSystem(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nSYSTEM Say hi!", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -230,7 +230,7 @@ func TestCreateUnsetsSystem(t *testing.T) {
filepath.Join(p, "blobs", "sha256-f29e82a8284dbdf5910b1555580ff60b04238b8da9d5e51159ada67a4d0d5851"),
})
- w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nSYSTEM \"\"", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -267,7 +267,7 @@ func TestCreateMergeParameters(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nPARAMETER temperature 1\nPARAMETER top_k 10\nPARAMETER stop USER:\nPARAMETER stop ASSISTANT:", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -288,7 +288,7 @@ func TestCreateMergeParameters(t *testing.T) {
})
// in order to merge parameters, the second model must be created FROM the first
- w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test2",
Modelfile: "FROM test\nPARAMETER temperature 0.6\nPARAMETER top_p 0.7",
Stream: &stream,
@@ -326,7 +326,7 @@ func TestCreateMergeParameters(t *testing.T) {
}
// slices are replaced
- w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test2",
Modelfile: "FROM test\nPARAMETER temperature 0.6\nPARAMETER top_p 0.7\nPARAMETER stop <|endoftext|>",
Stream: &stream,
@@ -371,7 +371,7 @@ func TestCreateReplacesMessages(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nMESSAGE assistant \"What is my purpose?\"\nMESSAGE user \"You run tests.\"\nMESSAGE assistant \"Oh, my god.\"", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -391,7 +391,7 @@ func TestCreateReplacesMessages(t *testing.T) {
filepath.Join(p, "blobs", "sha256-e0e27d47045063ccb167ae852c51d49a98eab33fabaee4633fdddf97213e40b5"),
})
- w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test2",
Modelfile: "FROM test\nMESSAGE assistant \"You're a test, Harry.\"\nMESSAGE user \"I-I'm a what?\"\nMESSAGE assistant \"A test. And a thumping good one at that, I'd wager.\"",
Stream: &stream,
@@ -448,7 +448,7 @@ func TestCreateTemplateSystem(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}\nSYSTEM Say hello!\nTEMPLATE {{ .System }} {{ .Prompt }}\nSYSTEM Say bye!", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -488,7 +488,7 @@ func TestCreateTemplateSystem(t *testing.T) {
}
t.Run("incomplete template", func(t *testing.T) {
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -500,7 +500,7 @@ func TestCreateTemplateSystem(t *testing.T) {
})
t.Run("template with unclosed if", func(t *testing.T) {
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ if .Prompt }}", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -512,7 +512,7 @@ func TestCreateTemplateSystem(t *testing.T) {
})
t.Run("template with undefined function", func(t *testing.T) {
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ Prompt }}", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -531,7 +531,7 @@ func TestCreateLicenses(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nLICENSE MIT\nLICENSE Apache-2.0", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -579,7 +579,7 @@ func TestCreateDetectTemplate(t *testing.T) {
var s Server
t.Run("matched", func(t *testing.T) {
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
"tokenizer.chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
@@ -600,7 +600,7 @@ func TestCreateDetectTemplate(t *testing.T) {
})
t.Run("unmatched", func(t *testing.T) {
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream,
diff --git a/server/routes_delete_test.go b/server/routes_delete_test.go
index 82fac9f5..5a337e79 100644
--- a/server/routes_delete_test.go
+++ b/server/routes_delete_test.go
@@ -22,7 +22,7 @@ func TestDelete(t *testing.T) {
var s Server
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
})
@@ -31,7 +31,7 @@ func TestDelete(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
- w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test2",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)),
})
@@ -52,7 +52,7 @@ func TestDelete(t *testing.T) {
filepath.Join(p, "blobs", "sha256-fe7ac77b725cda2ccad03f88a880ecdfd7a33192d6cae08fce2c0ee1455991ed"),
})
- w = createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test"})
+ w = createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "test"})
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code)
@@ -68,7 +68,7 @@ func TestDelete(t *testing.T) {
filepath.Join(p, "blobs", "sha256-fe7ac77b725cda2ccad03f88a880ecdfd7a33192d6cae08fce2c0ee1455991ed"),
})
- w = createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test2"})
+ w = createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "test2"})
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code)
@@ -102,7 +102,7 @@ func TestDeleteDuplicateLayers(t *testing.T) {
t.Fatal(err)
}
- w := createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test"})
+ w := createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "test"})
if w.Code != http.StatusOK {
t.Errorf("expected status code 200, actual %d", w.Code)
}
diff --git a/server/routes_generate_test.go b/server/routes_generate_test.go
index 5c0caff1..480b9672 100644
--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@@ -84,7 +84,7 @@ func TestGenerateChat(t *testing.T) {
go s.sched.Run(context.TODO())
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "test",
Modelfile: fmt.Sprintf(`FROM %s
TEMPLATE """
@@ -144,7 +144,7 @@ func TestGenerateChat(t *testing.T) {
})
t.Run("missing capabilities chat", func(t *testing.T) {
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "bert",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
"general.architecture": "bert",
@@ -270,7 +270,7 @@ func TestGenerateChat(t *testing.T) {
checkChatResponse(t, w.Body, "test", "Hi!")
})
- w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w = createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "test-system",
Modelfile: "FROM test\nSYSTEM You are a helpful assistant.",
})
@@ -382,7 +382,7 @@ func TestGenerate(t *testing.T) {
go s.sched.Run(context.TODO())
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "test",
Modelfile: fmt.Sprintf(`FROM %s
TEMPLATE """
@@ -442,7 +442,7 @@ func TestGenerate(t *testing.T) {
})
t.Run("missing capabilities generate", func(t *testing.T) {
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "bert",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
"general.architecture": "bert",
@@ -583,7 +583,7 @@ func TestGenerate(t *testing.T) {
checkGenerateResponse(t, w.Body, "test", "Hi!")
})
- w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w = createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "test-system",
Modelfile: "FROM test\nSYSTEM You are a helpful assistant.",
})
@@ -652,7 +652,7 @@ func TestGenerate(t *testing.T) {
checkGenerateResponse(t, w.Body, "test-system", "Abra kadabra!")
})
- w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w = createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "test-suffix",
Modelfile: `FROM test
TEMPLATE """{{- if .Suffix }} {{ .Prompt }} {{ .Suffix }}
diff --git a/server/routes_list_test.go b/server/routes_list_test.go
index 6e92b7a1..56b40830 100644
--- a/server/routes_list_test.go
+++ b/server/routes_list_test.go
@@ -31,13 +31,13 @@ func TestList(t *testing.T) {
var s Server
for _, n := range expectNames {
- createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ createRequest(t, s.CreateHandler, api.CreateRequest{
Name: n,
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
})
}
- w := createRequest(t, s.ListModelsHandler, nil)
+ w := createRequest(t, s.ListHandler, nil)
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
diff --git a/server/routes_test.go b/server/routes_test.go
index 242875d6..bffcea20 100644
--- a/server/routes_test.go
+++ b/server/routes_test.go
@@ -318,7 +318,7 @@ func TestCase(t *testing.T) {
var s Server
for _, tt := range cases {
t.Run(tt, func(t *testing.T) {
- w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: tt,
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -334,7 +334,7 @@ func TestCase(t *testing.T) {
}
t.Run("create", func(t *testing.T) {
- w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: strings.ToUpper(tt),
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -350,7 +350,7 @@ func TestCase(t *testing.T) {
})
t.Run("pull", func(t *testing.T) {
- w := createRequest(t, s.PullModelHandler, api.PullRequest{
+ w := createRequest(t, s.PullHandler, api.PullRequest{
Name: strings.ToUpper(tt),
Stream: &stream,
})
@@ -365,7 +365,7 @@ func TestCase(t *testing.T) {
})
t.Run("copy", func(t *testing.T) {
- w := createRequest(t, s.CopyModelHandler, api.CopyRequest{
+ w := createRequest(t, s.CopyHandler, api.CopyRequest{
Source: tt,
Destination: strings.ToUpper(tt),
})
@@ -387,7 +387,7 @@ func TestShow(t *testing.T) {
var s Server
- createRequest(t, s.CreateModelHandler, api.CreateRequest{
+ createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "show-model",
Modelfile: fmt.Sprintf(
"FROM %s\nFROM %s",
@@ -396,7 +396,7 @@ func TestShow(t *testing.T) {
),
})
- w := createRequest(t, s.ShowModelHandler, api.ShowRequest{
+ w := createRequest(t, s.ShowHandler, api.ShowRequest{
Name: "show-model",
})
diff --git a/server/sched.go b/server/sched.go
index 9d8c4144..58071bf0 100644
--- a/server/sched.go
+++ b/server/sched.go
@@ -193,6 +193,11 @@ func (s *Scheduler) processPending(ctx context.Context) {
break
}
+ // Embedding models should always be loaded with parallel=1
+ if pending.model.CheckCapabilities(CapabilityCompletion) != nil {
+ numParallel = 1
+ }
+
// Evaluate if the model will fit in the available system memory, or if we should unload a model first
if len(gpus) == 1 && gpus[0].Library == "cpu" {
// simplifying assumption of defaultParallel when in CPU mode
diff --git a/server/sched_test.go b/server/sched_test.go
index 713b9259..fb049574 100644
--- a/server/sched_test.go
+++ b/server/sched_test.go
@@ -117,7 +117,6 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
require.NoError(t, llm.WriteGGUF(f, llm.KV{
"general.architecture": "llama",
- "general.name": "name",
"llama.context_length": uint32(32),
"llama.embedding_length": uint32(4096),
"llama.block_count": uint32(1),