From 3f0b309ad4c49c0d87839e50fe6a46163902aba0 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 10 Jun 2024 08:47:13 -0700 Subject: [PATCH 01/11] remove ManifestV2 --- server/images.go | 17 +++++------------ server/manifest.go | 20 +++++++++++--------- server/manifest_test.go | 2 +- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/server/images.go b/server/images.go index e949fb18..447a63a6 100644 --- a/server/images.go +++ b/server/images.go @@ -135,13 +135,6 @@ type Message struct { Content string `json:"content"` } -type ManifestV2 struct { - SchemaVersion int `json:"schemaVersion"` - MediaType string `json:"mediaType"` - Config *Layer `json:"config"` - Layers []*Layer `json:"layers"` -} - type ConfigV2 struct { ModelFormat string `json:"model_format"` ModelFamily string `json:"model_family"` @@ -160,7 +153,7 @@ type RootFS struct { DiffIDs []string `json:"diff_ids"` } -func GetManifest(mp ModelPath) (*ManifestV2, string, error) { +func GetManifest(mp ModelPath) (*Manifest, string, error) { fp, err := mp.GetManifestPath() if err != nil { return nil, "", err @@ -170,7 +163,7 @@ func GetManifest(mp ModelPath) (*ManifestV2, string, error) { return nil, "", err } - var manifest *ManifestV2 + var manifest *Manifest bts, err := os.ReadFile(fp) if err != nil { @@ -822,7 +815,7 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error { mp := ParseModelPath(name) - var manifest *ManifestV2 + var manifest *Manifest var err error var noprune string @@ -929,7 +922,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu return nil } -func pullModelManifest(ctx context.Context, mp ModelPath, regOpts *registryOptions) (*ManifestV2, error) { +func pullModelManifest(ctx context.Context, mp ModelPath, regOpts *registryOptions) (*Manifest, error) { requestURL := mp.BaseURL().JoinPath("v2", mp.GetNamespaceRepository(), "manifests", mp.Tag) headers := make(http.Header) @@ -940,7 +933,7 @@ func pullModelManifest(ctx context.Context, mp ModelPath, regOpts *registryOptio } defer resp.Body.Close() - var m *ManifestV2 + var m *Manifest if err := json.NewDecoder(resp.Body).Decode(&m); err != nil { return nil, err } diff --git a/server/manifest.go b/server/manifest.go index 61dd1ab4..726bb48d 100644 --- a/server/manifest.go +++ b/server/manifest.go @@ -14,7 +14,10 @@ import ( ) type Manifest struct { - ManifestV2 + SchemaVersion int `json:"schemaVersion"` + MediaType string `json:"mediaType"` + Config *Layer `json:"config"` + Layers []*Layer `json:"layers"` filepath string fi os.FileInfo @@ -66,7 +69,7 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) { p := filepath.Join(manifests, n.Filepath()) - var m ManifestV2 + var m Manifest f, err := os.Open(p) if err != nil { return nil, err @@ -83,12 +86,11 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) { return nil, err } - return &Manifest{ - ManifestV2: m, - filepath: p, - fi: fi, - digest: fmt.Sprintf("%x", sha256sum.Sum(nil)), - }, nil + m.filepath = p + m.fi = fi + m.digest = fmt.Sprintf("%x", sha256sum.Sum(nil)) + + return &m, nil } func WriteManifest(name model.Name, config *Layer, layers []*Layer) error { @@ -108,7 +110,7 @@ func WriteManifest(name model.Name, config *Layer, layers []*Layer) error { } defer f.Close() - m := ManifestV2{ + m := Manifest{ SchemaVersion: 2, MediaType: "application/vnd.docker.distribution.manifest.v2+json", Config: config, diff --git a/server/manifest_test.go b/server/manifest_test.go index ceee31d8..ca6c3d2e 100644 --- a/server/manifest_test.go +++ b/server/manifest_test.go @@ -25,7 +25,7 @@ func createManifest(t *testing.T, path, name string) { } defer f.Close() - if err := json.NewEncoder(f).Encode(ManifestV2{}); err != nil { + if err := json.NewEncoder(f).Encode(Manifest{}); err != nil { t.Fatal(err) } } From 58e3fff311f9e7abec20cdfe20fa43958e447aeb Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 10 Jun 2024 14:54:42 -0700 Subject: [PATCH 02/11] rename templates to template --- server/images.go | 26 ++- server/model.go | 4 +- server/prompt.go | 18 +- server/prompt_test.go | 15 +- server/routes.go | 26 ++- {templates => template}/alfred.gotmpl | 0 {templates => template}/alpaca.gotmpl | 0 {templates => template}/chatml.gotmpl | 0 {templates => template}/chatqa.gotmpl | 0 .../codellama-70b-instruct.gotmpl | 0 .../falcon-instruct.gotmpl | 0 {templates => template}/gemma-instruct.gotmpl | 0 .../granite-instruct.gotmpl | 0 {templates => template}/index.json | 0 {templates => template}/llama2-chat.gotmpl | 0 .../llama3-instruct.gotmpl | 0 {templates => template}/magicoder.gotmpl | 0 .../mistral-instruct.gotmpl | 0 {templates => template}/openchat.gotmpl | 0 {templates => template}/phi-3.gotmpl | 0 {templates => template}/solar-instruct.gotmpl | 0 .../starcoder2-instruct.gotmpl | 0 template/template.go | 158 ++++++++++++++++++ template/template_test.go | 89 ++++++++++ .../testdata/templates.jsonl | 0 {templates => template}/vicuna.gotmpl | 0 {templates => template}/zephyr.gotmpl | 0 templates/template.go | 70 -------- templates/template_test.go | 59 ------- 29 files changed, 301 insertions(+), 164 deletions(-) rename {templates => template}/alfred.gotmpl (100%) rename {templates => template}/alpaca.gotmpl (100%) rename {templates => template}/chatml.gotmpl (100%) rename {templates => template}/chatqa.gotmpl (100%) rename {templates => template}/codellama-70b-instruct.gotmpl (100%) rename {templates => template}/falcon-instruct.gotmpl (100%) rename {templates => template}/gemma-instruct.gotmpl (100%) rename {templates => template}/granite-instruct.gotmpl (100%) rename {templates => template}/index.json (100%) rename {templates => template}/llama2-chat.gotmpl (100%) rename {templates => template}/llama3-instruct.gotmpl (100%) rename {templates => template}/magicoder.gotmpl (100%) rename {templates => template}/mistral-instruct.gotmpl (100%) rename {templates => template}/openchat.gotmpl (100%) rename {templates => template}/phi-3.gotmpl (100%) rename {templates => template}/solar-instruct.gotmpl (100%) rename {templates => template}/starcoder2-instruct.gotmpl (100%) create mode 100644 template/template.go create mode 100644 template/template_test.go rename {templates => template}/testdata/templates.jsonl (100%) rename {templates => template}/vicuna.gotmpl (100%) rename {templates => template}/zephyr.gotmpl (100%) delete mode 100644 templates/template.go delete mode 100644 templates/template_test.go diff --git a/server/images.go b/server/images.go index 447a63a6..65ed51c7 100644 --- a/server/images.go +++ b/server/images.go @@ -28,6 +28,7 @@ import ( "github.com/ollama/ollama/format" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/parser" + "github.com/ollama/ollama/template" "github.com/ollama/ollama/types/errtypes" "github.com/ollama/ollama/types/model" "github.com/ollama/ollama/version" @@ -48,12 +49,13 @@ type Model struct { ParentModel string AdapterPaths []string ProjectorPaths []string - Template string System string License []string Digest string Options map[string]interface{} Messages []Message + + Template *template.Template } func (m *Model) IsEmbedding() bool { @@ -82,10 +84,10 @@ func (m *Model) String() string { }) } - if m.Template != "" { + if m.Template != nil { modelfile.Commands = append(modelfile.Commands, parser.Command{ Name: "template", - Args: m.Template, + Args: m.Template.String(), }) } @@ -191,8 +193,7 @@ func GetModel(name string) (*Model, error) { Name: mp.GetFullTagname(), ShortName: mp.GetShortTagname(), Digest: digest, - Template: "{{ .Prompt }}", - License: []string{}, + Template: template.DefaultTemplate, } filename, err := GetBlobsPath(manifest.Config.Digest) @@ -228,13 +229,17 @@ func GetModel(name string) (*Model, error) { model.AdapterPaths = append(model.AdapterPaths, filename) case "application/vnd.ollama.image.projector": model.ProjectorPaths = append(model.ProjectorPaths, filename) - case "application/vnd.ollama.image.template": + case "application/vnd.ollama.image.prompt", + "application/vnd.ollama.image.template": bts, err := os.ReadFile(filename) if err != nil { return nil, err } - model.Template = string(bts) + model.Template, err = template.Parse(string(bts)) + if err != nil { + return nil, err + } case "application/vnd.ollama.image.system": bts, err := os.ReadFile(filename) if err != nil { @@ -242,13 +247,6 @@ func GetModel(name string) (*Model, error) { } model.System = string(bts) - case "application/vnd.ollama.image.prompt": - bts, err := os.ReadFile(filename) - if err != nil { - return nil, err - } - - model.Template = string(bts) case "application/vnd.ollama.image.params": params, err := os.Open(filename) if err != nil { diff --git a/server/model.go b/server/model.go index d56e641b..6abb5b39 100644 --- a/server/model.go +++ b/server/model.go @@ -16,7 +16,7 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/convert" "github.com/ollama/ollama/llm" - "github.com/ollama/ollama/templates" + "github.com/ollama/ollama/template" "github.com/ollama/ollama/types/model" ) @@ -258,7 +258,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) { for _, layer := range layers { if s := layer.GGML.KV().ChatTemplate(); s != "" { - if t, err := templates.NamedTemplate(s); err != nil { + if t, err := template.Named(s); err != nil { slog.Debug("template detection", "error", err) } else { tmpl, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template") diff --git a/server/prompt.go b/server/prompt.go index 604e6971..bfc319a5 100644 --- a/server/prompt.go +++ b/server/prompt.go @@ -4,10 +4,11 @@ import ( "fmt" "log/slog" "strings" - "text/template" + "text/template/parse" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/template" ) // isResponseNode checks if the node contains .Response @@ -53,13 +54,8 @@ func formatTemplateForResponse(tmpl *template.Template, generate bool) { // Prompt renders a prompt from a template. If generate is set to true, // the response and parts of the template following it are not rendered -func Prompt(tmpl, system, prompt, response string, generate bool) (string, error) { - parsed, err := template.New("").Option("missingkey=zero").Parse(tmpl) - if err != nil { - return "", err - } - - formatTemplateForResponse(parsed, generate) +func Prompt(tmpl *template.Template, system, prompt, response string, generate bool) (string, error) { + formatTemplateForResponse(tmpl, generate) vars := map[string]any{ "System": system, @@ -68,14 +64,14 @@ func Prompt(tmpl, system, prompt, response string, generate bool) (string, error } var sb strings.Builder - if err := parsed.Execute(&sb, vars); err != nil { + if err := tmpl.Execute(&sb, vars); err != nil { return "", err } return sb.String(), nil } -func countTokens(tmpl string, system string, prompt string, response string, encode func(string) ([]int, error)) (int, error) { +func countTokens(tmpl *template.Template, system string, prompt string, response string, encode func(string) ([]int, error)) (int, error) { rendered, err := Prompt(tmpl, system, prompt, response, false) if err != nil { return 0, err @@ -91,7 +87,7 @@ func countTokens(tmpl string, system string, prompt string, response string, enc } // ChatPrompt builds up a prompt from a series of messages, truncating based on context window size -func ChatPrompt(tmpl string, messages []api.Message, window int, encode func(string) ([]int, error)) (string, error) { +func ChatPrompt(tmpl *template.Template, messages []api.Message, window int, encode func(string) ([]int, error)) (string, error) { type prompt struct { System string Prompt string diff --git a/server/prompt_test.go b/server/prompt_test.go index a7e18a70..7df58d0b 100644 --- a/server/prompt_test.go +++ b/server/prompt_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/template" ) func TestPrompt(t *testing.T) { @@ -61,7 +62,12 @@ func TestPrompt(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - got, err := Prompt(tc.template, tc.system, tc.prompt, tc.response, tc.generate) + tmpl, err := template.Parse(tc.template) + if err != nil { + t.Fatal(err) + } + + got, err := Prompt(tmpl, tc.system, tc.prompt, tc.response, tc.generate) if err != nil { t.Errorf("error = %v", err) } @@ -192,7 +198,12 @@ func TestChatPrompt(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - got, err := ChatPrompt(tc.template, tc.messages, tc.window, encode) + tmpl, err := template.Parse(tc.template) + if err != nil { + t.Fatal(err) + } + + got, err := ChatPrompt(tmpl, tc.messages, tc.window, encode) if err != nil { t.Errorf("error = %v", err) } diff --git a/server/routes.go b/server/routes.go index 76ead072..d8a4a67e 100644 --- a/server/routes.go +++ b/server/routes.go @@ -31,6 +31,7 @@ import ( "github.com/ollama/ollama/llm" "github.com/ollama/ollama/openai" "github.com/ollama/ollama/parser" + "github.com/ollama/ollama/template" "github.com/ollama/ollama/types/errtypes" "github.com/ollama/ollama/types/model" "github.com/ollama/ollama/version" @@ -161,6 +162,12 @@ func (s *Server) GenerateHandler(c *gin.Context) { return } + tmpl, err := template.Parse(req.Template) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + checkpointLoaded := time.Now() var prompt string @@ -169,7 +176,11 @@ func (s *Server) GenerateHandler(c *gin.Context) { prompt = req.Prompt case req.Prompt != "": if req.Template == "" { - req.Template = model.Template + model.Template, err = template.Parse(req.Template) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } } if req.System == "" { @@ -187,7 +198,7 @@ func (s *Server) GenerateHandler(c *gin.Context) { sb.WriteString(req.Prompt) - p, err := Prompt(req.Template, req.System, sb.String(), "", true) + p, err := Prompt(tmpl, req.System, sb.String(), "", true) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return @@ -242,7 +253,7 @@ func (s *Server) GenerateHandler(c *gin.Context) { resp.LoadDuration = checkpointLoaded.Sub(checkpointStart) if !req.Raw { - p, err := Prompt(req.Template, req.System, req.Prompt, generated.String(), false) + p, err := Prompt(tmpl, req.System, req.Prompt, generated.String(), false) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return @@ -680,7 +691,10 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) { } if req.Template != "" { - m.Template = req.Template + m.Template, err = template.Parse(req.Template) + if err != nil { + return nil, err + } } msgs := make([]api.Message, 0) @@ -701,7 +715,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) { resp := &api.ShowResponse{ License: strings.Join(m.License, "\n"), System: m.System, - Template: m.Template, + Template: m.Template.String(), Details: modelDetails, Messages: msgs, ModifiedAt: manifest.fi.ModTime(), @@ -1246,7 +1260,7 @@ func (s *Server) ProcessHandler(c *gin.Context) { } // ChatPrompt builds up a prompt from a series of messages for the currently `loaded` model -func chatPrompt(ctx context.Context, runner *runnerRef, template string, messages []api.Message, numCtx int) (string, error) { +func chatPrompt(ctx context.Context, runner *runnerRef, template *template.Template, messages []api.Message, numCtx int) (string, error) { encode := func(s string) ([]int, error) { return runner.llama.Tokenize(ctx, s) } diff --git a/templates/alfred.gotmpl b/template/alfred.gotmpl similarity index 100% rename from templates/alfred.gotmpl rename to template/alfred.gotmpl diff --git a/templates/alpaca.gotmpl b/template/alpaca.gotmpl similarity index 100% rename from templates/alpaca.gotmpl rename to template/alpaca.gotmpl diff --git a/templates/chatml.gotmpl b/template/chatml.gotmpl similarity index 100% rename from templates/chatml.gotmpl rename to template/chatml.gotmpl diff --git a/templates/chatqa.gotmpl b/template/chatqa.gotmpl similarity index 100% rename from templates/chatqa.gotmpl rename to template/chatqa.gotmpl diff --git a/templates/codellama-70b-instruct.gotmpl b/template/codellama-70b-instruct.gotmpl similarity index 100% rename from templates/codellama-70b-instruct.gotmpl rename to template/codellama-70b-instruct.gotmpl diff --git a/templates/falcon-instruct.gotmpl b/template/falcon-instruct.gotmpl similarity index 100% rename from templates/falcon-instruct.gotmpl rename to template/falcon-instruct.gotmpl diff --git a/templates/gemma-instruct.gotmpl b/template/gemma-instruct.gotmpl similarity index 100% rename from templates/gemma-instruct.gotmpl rename to template/gemma-instruct.gotmpl diff --git a/templates/granite-instruct.gotmpl b/template/granite-instruct.gotmpl similarity index 100% rename from templates/granite-instruct.gotmpl rename to template/granite-instruct.gotmpl diff --git a/templates/index.json b/template/index.json similarity index 100% rename from templates/index.json rename to template/index.json diff --git a/templates/llama2-chat.gotmpl b/template/llama2-chat.gotmpl similarity index 100% rename from templates/llama2-chat.gotmpl rename to template/llama2-chat.gotmpl diff --git a/templates/llama3-instruct.gotmpl b/template/llama3-instruct.gotmpl similarity index 100% rename from templates/llama3-instruct.gotmpl rename to template/llama3-instruct.gotmpl diff --git a/templates/magicoder.gotmpl b/template/magicoder.gotmpl similarity index 100% rename from templates/magicoder.gotmpl rename to template/magicoder.gotmpl diff --git a/templates/mistral-instruct.gotmpl b/template/mistral-instruct.gotmpl similarity index 100% rename from templates/mistral-instruct.gotmpl rename to template/mistral-instruct.gotmpl diff --git a/templates/openchat.gotmpl b/template/openchat.gotmpl similarity index 100% rename from templates/openchat.gotmpl rename to template/openchat.gotmpl diff --git a/templates/phi-3.gotmpl b/template/phi-3.gotmpl similarity index 100% rename from templates/phi-3.gotmpl rename to template/phi-3.gotmpl diff --git a/templates/solar-instruct.gotmpl b/template/solar-instruct.gotmpl similarity index 100% rename from templates/solar-instruct.gotmpl rename to template/solar-instruct.gotmpl diff --git a/templates/starcoder2-instruct.gotmpl b/template/starcoder2-instruct.gotmpl similarity index 100% rename from templates/starcoder2-instruct.gotmpl rename to template/starcoder2-instruct.gotmpl diff --git a/template/template.go b/template/template.go new file mode 100644 index 00000000..d15f7156 --- /dev/null +++ b/template/template.go @@ -0,0 +1,158 @@ +package template + +import ( + "bytes" + "embed" + "encoding/json" + "errors" + "io" + "math" + "slices" + "strings" + "sync" + "text/template" + "text/template/parse" + + "github.com/agnivade/levenshtein" + "golang.org/x/exp/maps" +) + +//go:embed index.json +var indexBytes []byte + +//go:embed *.gotmpl +var templatesFS embed.FS + +var templatesOnce = sync.OnceValues(func() ([]*named, error) { + var templates []*named + if err := json.Unmarshal(indexBytes, &templates); err != nil { + return nil, err + } + + for _, t := range templates { + bts, err := templatesFS.ReadFile(t.Name + ".gotmpl") + if err != nil { + return nil, err + } + + // normalize line endings + t.Bytes = bytes.ReplaceAll(bts, []byte("\r\n"), []byte("\n")) + } + + return templates, nil +}) + +type named struct { + Name string `json:"name"` + Template string `json:"template"` + Bytes []byte +} + +func (t named) Reader() io.Reader { + return bytes.NewReader(t.Bytes) +} + +func Named(s string) (*named, error) { + templates, err := templatesOnce() + if err != nil { + return nil, err + } + + var template *named + score := math.MaxInt + for _, t := range templates { + if s := levenshtein.ComputeDistance(s, t.Template); s < score { + score = s + template = t + } + } + + if score < 100 { + return template, nil + } + + return nil, errors.New("no matching template found") +} + +type Template struct { + *template.Template + raw string +} + +func (t *Template) String() string { + return t.raw +} + +var DefaultTemplate, _ = Parse("{{ .Prompt }}") + +func Parse(s string) (*Template, error) { + t, err := template.New("").Option("missingkey=zero").Parse(s) + if err != nil { + return nil, err + } + + return &Template{Template: t, raw: s}, nil +} + +func (t *Template) Vars() []string { + var vars []string + for _, n := range t.Tree.Root.Nodes { + vars = append(vars, parseNode(n)...) + } + + set := make(map[string]struct{}) + for _, n := range vars { + set[strings.ToLower(n)] = struct{}{} + } + + vars = maps.Keys(set) + slices.Sort(vars) + return vars +} + +func parseNode(n parse.Node) []string { + switch n := n.(type) { + case *parse.ActionNode: + return parseNode(n.Pipe) + case *parse.IfNode: + names := parseNode(n.Pipe) + names = append(names, parseNode(n.List)...) + if n.ElseList != nil { + names = append(names, parseNode(n.ElseList)...) + } + return names + case *parse.RangeNode: + names := parseNode(n.Pipe) + names = append(names, parseNode(n.List)...) + if n.ElseList != nil { + names = append(names, parseNode(n.ElseList)...) + } + return names + case *parse.WithNode: + names := parseNode(n.Pipe) + names = append(names, parseNode(n.List)...) + if n.ElseList != nil { + names = append(names, parseNode(n.ElseList)...) + } + return names + case *parse.PipeNode: + var names []string + for _, c := range n.Cmds { + for _, a := range c.Args { + names = append(names, parseNode(a)...) + } + } + return names + case *parse.ListNode: + var names []string + for _, n := range n.Nodes { + names = append(names, parseNode(n)...) + } + + return names + case *parse.FieldNode: + return n.Ident + } + + return nil +} diff --git a/template/template_test.go b/template/template_test.go new file mode 100644 index 00000000..e5405bdb --- /dev/null +++ b/template/template_test.go @@ -0,0 +1,89 @@ +package template + +import ( + "bufio" + "bytes" + "encoding/json" + "io" + "os" + "path/filepath" + "slices" + "testing" + "text/template" + + "github.com/ollama/ollama/llm" +) + +func TestNamed(t *testing.T) { + f, err := os.Open(filepath.Join("testdata", "templates.jsonl")) + if err != nil { + t.Fatal(err) + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + var ss map[string]string + if err := json.Unmarshal(scanner.Bytes(), &ss); err != nil { + t.Fatal(err) + } + + for k, v := range ss { + t.Run(k, func(t *testing.T) { + kv := llm.KV{"tokenizer.chat_template": v} + s := kv.ChatTemplate() + r, err := Named(s) + if err != nil { + t.Fatal(err) + } + + if r.Name != k { + t.Errorf("expected %q, got %q", k, r.Name) + } + + var b bytes.Buffer + if _, err := io.Copy(&b, r.Reader()); err != nil { + t.Fatal(err) + } + + tmpl, err := template.New(s).Parse(b.String()) + if err != nil { + t.Fatal(err) + } + + if tmpl.Tree.Root.String() == "" { + t.Errorf("empty %s template", k) + } + }) + } + } +} + +func TestParse(t *testing.T) { + cases := []struct { + template string + capabilities []string + }{ + {"{{ .Prompt }}", []string{"prompt"}}, + {"{{ .System }} {{ .Prompt }}", []string{"prompt", "system"}}, + {"{{ .System }} {{ .Prompt }} {{ .Response }}", []string{"prompt", "response", "system"}}, + {"{{ with .Tools }}{{ . }}{{ end }} {{ .System }} {{ .Prompt }}", []string{"prompt", "system", "tools"}}, + {"{{ range .Messages }}{{ .Role }} {{ .Content }}{{ end }}", []string{"content", "messages", "role"}}, + {"{{ range .Messages }}{{ if eq .Role \"system\" }}SYSTEM: {{ .Content }}{{ else if eq .Role \"user\" }}USER: {{ .Content }}{{ else if eq .Role \"assistant\" }}ASSISTANT: {{ .Content }}{{ end }}{{ end }}", []string{"content", "messages", "role"}}, + {"{{ .Prompt }} {{ .Suffix }}", []string{"prompt", "suffix"}}, + } + + for _, tt := range cases { + t.Run("", func(t *testing.T) { + tmpl, err := Parse(tt.template) + if err != nil { + t.Fatal(err) + } + + vars := tmpl.Vars() + if !slices.Equal(tt.capabilities, vars) { + t.Errorf("expected %v, got %v", tt.capabilities, vars) + } + }) + } +} diff --git a/templates/testdata/templates.jsonl b/template/testdata/templates.jsonl similarity index 100% rename from templates/testdata/templates.jsonl rename to template/testdata/templates.jsonl diff --git a/templates/vicuna.gotmpl b/template/vicuna.gotmpl similarity index 100% rename from templates/vicuna.gotmpl rename to template/vicuna.gotmpl diff --git a/templates/zephyr.gotmpl b/template/zephyr.gotmpl similarity index 100% rename from templates/zephyr.gotmpl rename to template/zephyr.gotmpl diff --git a/templates/template.go b/templates/template.go deleted file mode 100644 index 72bd69e9..00000000 --- a/templates/template.go +++ /dev/null @@ -1,70 +0,0 @@ -package templates - -import ( - "bytes" - "embed" - "encoding/json" - "errors" - "io" - "math" - "sync" - - "github.com/agnivade/levenshtein" -) - -//go:embed index.json -var indexBytes []byte - -//go:embed *.gotmpl -var templatesFS embed.FS - -var templatesOnce = sync.OnceValues(func() ([]*Template, error) { - var templates []*Template - if err := json.Unmarshal(indexBytes, &templates); err != nil { - return nil, err - } - - for _, t := range templates { - bts, err := templatesFS.ReadFile(t.Name + ".gotmpl") - if err != nil { - return nil, err - } - - // normalize line endings - t.Bytes = bytes.ReplaceAll(bts, []byte("\r\n"), []byte("\n")) - } - - return templates, nil -}) - -type Template struct { - Name string `json:"name"` - Template string `json:"template"` - Bytes []byte -} - -func (t Template) Reader() io.Reader { - return bytes.NewReader(t.Bytes) -} - -func NamedTemplate(s string) (*Template, error) { - templates, err := templatesOnce() - if err != nil { - return nil, err - } - - var template *Template - score := math.MaxInt - for _, t := range templates { - if s := levenshtein.ComputeDistance(s, t.Template); s < score { - score = s - template = t - } - } - - if score < 100 { - return template, nil - } - - return nil, errors.New("no matching template found") -} diff --git a/templates/template_test.go b/templates/template_test.go deleted file mode 100644 index 61bc7837..00000000 --- a/templates/template_test.go +++ /dev/null @@ -1,59 +0,0 @@ -package templates - -import ( - "bufio" - "bytes" - "encoding/json" - "io" - "os" - "path/filepath" - "testing" - "text/template" - - "github.com/ollama/ollama/llm" -) - -func TestKVChatTemplate(t *testing.T) { - f, err := os.Open(filepath.Join("testdata", "templates.jsonl")) - if err != nil { - t.Fatal(err) - } - defer f.Close() - - scanner := bufio.NewScanner(f) - for scanner.Scan() { - var ss map[string]string - if err := json.Unmarshal(scanner.Bytes(), &ss); err != nil { - t.Fatal(err) - } - - for k, v := range ss { - t.Run(k, func(t *testing.T) { - kv := llm.KV{"tokenizer.chat_template": v} - s := kv.ChatTemplate() - r, err := NamedTemplate(s) - if err != nil { - t.Fatal(err) - } - - if r.Name != k { - t.Errorf("expected %q, got %q", k, r.Name) - } - - var b bytes.Buffer - if _, err := io.Copy(&b, r.Reader()); err != nil { - t.Fatal(err) - } - - tmpl, err := template.New(s).Parse(b.String()) - if err != nil { - t.Fatal(err) - } - - if tmpl.Tree.Root.String() == "" { - t.Errorf("empty %s template", k) - } - }) - } - } -} From a30915bde166b2f392a0ff72c61c9ac53189a962 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Tue, 11 Jun 2024 14:03:42 -0700 Subject: [PATCH 03/11] add capabilities --- server/images.go | 20 ++++++++++++++++++-- server/routes.go | 8 ++++---- template/template_test.go | 8 ++++---- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/server/images.go b/server/images.go index 65ed51c7..5cd0a7a5 100644 --- a/server/images.go +++ b/server/images.go @@ -34,6 +34,10 @@ import ( "github.com/ollama/ollama/version" ) +type Capability string + +const CapabilityCompletion = Capability("completion") + type registryOptions struct { Insecure bool Username string @@ -58,8 +62,20 @@ type Model struct { Template *template.Template } -func (m *Model) IsEmbedding() bool { - return slices.Contains(m.Config.ModelFamilies, "bert") || slices.Contains(m.Config.ModelFamilies, "nomic-bert") +func (m *Model) Has(caps ...Capability) bool { + for _, cap := range caps { + switch cap { + case CapabilityCompletion: + if slices.Contains(m.Config.ModelFamilies, "bert") || slices.Contains(m.Config.ModelFamilies, "nomic-bert") { + return false + } + default: + slog.Error("unknown capability", "capability", cap) + return false + } + } + + return true } func (m *Model) String() string { diff --git a/server/routes.go b/server/routes.go index d8a4a67e..8ca6dcc8 100644 --- a/server/routes.go +++ b/server/routes.go @@ -122,8 +122,8 @@ func (s *Server) GenerateHandler(c *gin.Context) { return } - if model.IsEmbedding() { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "embedding models do not support generate"}) + if !model.Has(CapabilityCompletion) { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%s does not support generate", req.Model)}) return } @@ -1308,8 +1308,8 @@ func (s *Server) ChatHandler(c *gin.Context) { return } - if model.IsEmbedding() { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "embedding models do not support chat"}) + if !model.Has(CapabilityCompletion) { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%s does not support chat", req.Model)}) return } diff --git a/template/template_test.go b/template/template_test.go index e5405bdb..eda4634f 100644 --- a/template/template_test.go +++ b/template/template_test.go @@ -61,8 +61,8 @@ func TestNamed(t *testing.T) { func TestParse(t *testing.T) { cases := []struct { - template string - capabilities []string + template string + vars []string }{ {"{{ .Prompt }}", []string{"prompt"}}, {"{{ .System }} {{ .Prompt }}", []string{"prompt", "system"}}, @@ -81,8 +81,8 @@ func TestParse(t *testing.T) { } vars := tmpl.Vars() - if !slices.Equal(tt.capabilities, vars) { - t.Errorf("expected %v, got %v", tt.capabilities, vars) + if !slices.Equal(tt.vars, vars) { + t.Errorf("expected %v, got %v", tt.vars, vars) } }) } From da8e2a04479f96ad9c57eaf25ed26b79b239b05c Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Fri, 14 Jun 2024 14:57:49 -0700 Subject: [PATCH 04/11] use kvs to detect embedding models --- server/images.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/server/images.go b/server/images.go index 5cd0a7a5..a62991f1 100644 --- a/server/images.go +++ b/server/images.go @@ -66,7 +66,21 @@ func (m *Model) Has(caps ...Capability) bool { for _, cap := range caps { switch cap { case CapabilityCompletion: - if slices.Contains(m.Config.ModelFamilies, "bert") || slices.Contains(m.Config.ModelFamilies, "nomic-bert") { + f, err := os.Open(m.ModelPath) + if err != nil { + slog.Error("couldn't open model file", "error", err) + continue + } + defer f.Close() + + // TODO(mxyng): decode the GGML into model to avoid doing this multiple times + ggml, _, err := llm.DecodeGGML(f, 0) + if err != nil { + slog.Error("couldn't decode ggml", "error", err) + continue + } + + if _, ok := ggml.KV()[fmt.Sprintf("%s.pooling_type", ggml.KV().Architecture())]; ok { return false } default: From 88bcd79bb9a4b2baa739efe2ccabcbcf3c89bdb5 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Sun, 30 Jun 2024 11:10:40 -0700 Subject: [PATCH 05/11] err on insecure path --- server/model.go | 8 +++----- server/model_test.go | 24 ++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/server/model.go b/server/model.go index d56e641b..7d5957a1 100644 --- a/server/model.go +++ b/server/model.go @@ -11,7 +11,6 @@ import ( "net/http" "os" "path/filepath" - "strings" "github.com/ollama/ollama/api" "github.com/ollama/ollama/convert" @@ -91,12 +90,11 @@ func extractFromZipFile(p string, file *os.File, fn func(api.ProgressResponse)) fn(api.ProgressResponse{Status: "unpacking model metadata"}) for _, f := range r.File { - n := filepath.Join(p, f.Name) - if !strings.HasPrefix(n, p) { - slog.Warn("skipped extracting file outside of context", "name", f.Name) - continue + if !filepath.IsLocal(f.Name) { + return fmt.Errorf("%w: %s", zip.ErrInsecurePath, f.Name) } + n := filepath.Join(p, f.Name) if err := os.MkdirAll(filepath.Dir(n), 0o750); err != nil { return err } diff --git a/server/model_test.go b/server/model_test.go index c3023eb2..a383b7e7 100644 --- a/server/model_test.go +++ b/server/model_test.go @@ -3,10 +3,12 @@ package server import ( "archive/zip" "bytes" + "errors" "io" "os" "path/filepath" "slices" + "strings" "testing" "github.com/ollama/ollama/api" @@ -39,13 +41,31 @@ func TestExtractFromZipFile(t *testing.T) { cases := []struct { name string expect []string + err error }{ { name: "good", expect: []string{"good"}, }, { - name: filepath.Join("..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "bad"), + name: strings.Join([]string{"path", "..", "to", "good"}, string(os.PathSeparator)), + expect: []string{filepath.Join("to", "good")}, + }, + { + name: strings.Join([]string{"path", "..", "to", "..", "good"}, string(os.PathSeparator)), + expect: []string{"good"}, + }, + { + name: strings.Join([]string{"path", "to", "..", "..", "good"}, string(os.PathSeparator)), + expect: []string{"good"}, + }, + { + name: strings.Join([]string{"..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "bad"}, string(os.PathSeparator)), + err: zip.ErrInsecurePath, + }, + { + name: strings.Join([]string{"path", "..", "..", "to", "bad"}, string(os.PathSeparator)), + err: zip.ErrInsecurePath, }, } @@ -55,7 +75,7 @@ func TestExtractFromZipFile(t *testing.T) { defer f.Close() tempDir := t.TempDir() - if err := extractFromZipFile(tempDir, f, func(api.ProgressResponse) {}); err != nil { + if err := extractFromZipFile(tempDir, f, func(api.ProgressResponse) {}); !errors.Is(err, tt.err) { t.Fatal(err) } From 4f67b39d262b1997aa96c47585f1d8e8443d0f90 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 2 Jul 2024 09:22:17 -0700 Subject: [PATCH 06/11] Centos 7 EOL broke mirrors As of July 1st 2024: Could not resolve host: mirrorlist.centos.org This is expected due to EOL dates. --- scripts/rh_linux_deps.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/scripts/rh_linux_deps.sh b/scripts/rh_linux_deps.sh index ed60e430..81648d68 100644 --- a/scripts/rh_linux_deps.sh +++ b/scripts/rh_linux_deps.sh @@ -6,10 +6,21 @@ set -ex MACHINE=$(uname -m) if grep -i "centos" /etc/system-release >/dev/null; then + # As of 7/1/2024 mirrorlist.centos.org has been taken offline, so adjust accordingly + sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo + sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo + sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo + # Centos 7 derivatives have too old of a git version to run our generate script # uninstall and ignore failures yum remove -y git yum -y install epel-release centos-release-scl + + # The release packages reinstate the mirrors, undo that again + sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo + sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo + sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo + yum -y install dnf if [ "${MACHINE}" = "x86_64" ]; then yum -y install https://repo.ius.io/ius-release-el7.rpm From 020bd60ab2f156661b072515cd2c27d59b956535 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 2 Jul 2024 10:23:05 -0700 Subject: [PATCH 07/11] Switch amd container image base to rocky 8 The centos 7 arm mirrors have disappeared due to the EOL 2 days ago, and the vault sed workaround which works for x86 doesn't work for arm. --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 98a3ddfd..b2c5c4a2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -70,12 +70,12 @@ RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64 RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh -FROM --platform=linux/arm64 centos:7 AS cpu-builder-arm64 +FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64 ARG CMAKE_VERSION ARG GOLANG_VERSION COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh -ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH +ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ ARG OLLAMA_CUSTOM_CPU_DEFS ARG CGO_CFLAGS From 996bb1b85e0c1b3ae64246a50ea412dc2a2e30d8 Mon Sep 17 00:00:00 2001 From: royjhan <65097070+royjhan@users.noreply.github.com> Date: Tue, 2 Jul 2024 11:50:56 -0700 Subject: [PATCH 08/11] OpenAI: /v1/models and /v1/models/{model} compatibility (#5007) * OpenAI v1 models * Refactor Writers * Add Test Co-Authored-By: Attila Kerekes * Credit Co-Author Co-Authored-By: Attila Kerekes <439392+keriati@users.noreply.github.com> * Empty List Testing * Use Namespace for Ownedby * Update Test * Add back envconfig * v1/models docs * Use ModelName Parser * Test Names * Remove Docs * Clean Up * Test name Co-authored-by: Jeffrey Morgan * Add Middleware for Chat and List * Testing Cleanup * Test with Fatal * Add functionality to chat test * OpenAI: /v1/models/{model} compatibility (#5028) * Retrieve Model * OpenAI Delete Model * Retrieve Middleware * Remove Delete from Branch * Update Test * Middleware Test File * Function name * Cleanup * Test Update * Test Update --------- Co-authored-by: Attila Kerekes <439392+keriati@users.noreply.github.com> Co-authored-by: Jeffrey Morgan --- api/types.go | 7 ++ docs/openai.md | 1 + openai/openai.go | 163 ++++++++++++++++++++++++++++++++++++---- openai/openai_test.go | 170 ++++++++++++++++++++++++++++++++++++++++++ server/routes.go | 4 +- server/routes_test.go | 56 ++++++++++++++ 6 files changed, 387 insertions(+), 14 deletions(-) create mode 100644 openai/openai_test.go diff --git a/api/types.go b/api/types.go index 95ed5d37..428281ba 100644 --- a/api/types.go +++ b/api/types.go @@ -345,6 +345,13 @@ type ProcessModelResponse struct { SizeVRAM int64 `json:"size_vram"` } +type RetrieveModelResponse struct { + Id string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + OwnedBy string `json:"owned_by"` +} + type TokenResponse struct { Token string `json:"token"` } diff --git a/docs/openai.md b/docs/openai.md index 81b967eb..9dda05c3 100644 --- a/docs/openai.md +++ b/docs/openai.md @@ -65,6 +65,7 @@ curl http://localhost:11434/v1/chat/completions \ } ] }' + ``` ## Endpoints diff --git a/openai/openai.go b/openai/openai.go index 706d31aa..01da4440 100644 --- a/openai/openai.go +++ b/openai/openai.go @@ -12,6 +12,7 @@ import ( "github.com/gin-gonic/gin" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/types/model" ) type Error struct { @@ -85,6 +86,18 @@ type ChatCompletionChunk struct { Choices []ChunkChoice `json:"choices"` } +type Model struct { + Id string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + OwnedBy string `json:"owned_by"` +} + +type ListCompletion struct { + Object string `json:"object"` + Data []Model `json:"data"` +} + func NewError(code int, message string) ErrorResponse { var etype string switch code { @@ -145,7 +158,33 @@ func toChunk(id string, r api.ChatResponse) ChatCompletionChunk { } } -func fromRequest(r ChatCompletionRequest) api.ChatRequest { +func toListCompletion(r api.ListResponse) ListCompletion { + var data []Model + for _, m := range r.Models { + data = append(data, Model{ + Id: m.Name, + Object: "model", + Created: m.ModifiedAt.Unix(), + OwnedBy: model.ParseName(m.Name).Namespace, + }) + } + + return ListCompletion{ + Object: "list", + Data: data, + } +} + +func toModel(r api.ShowResponse, m string) Model { + return Model{ + Id: m, + Object: "model", + Created: r.ModifiedAt.Unix(), + OwnedBy: model.ParseName(m).Namespace, + } +} + +func fromChatRequest(r ChatCompletionRequest) api.ChatRequest { var messages []api.Message for _, msg := range r.Messages { messages = append(messages, api.Message{Role: msg.Role, Content: msg.Content}) @@ -208,13 +247,26 @@ func fromRequest(r ChatCompletionRequest) api.ChatRequest { } } -type writer struct { - stream bool - id string +type BaseWriter struct { gin.ResponseWriter } -func (w *writer) writeError(code int, data []byte) (int, error) { +type ChatWriter struct { + stream bool + id string + BaseWriter +} + +type ListWriter struct { + BaseWriter +} + +type RetrieveWriter struct { + BaseWriter + model string +} + +func (w *BaseWriter) writeError(code int, data []byte) (int, error) { var serr api.StatusError err := json.Unmarshal(data, &serr) if err != nil { @@ -230,7 +282,7 @@ func (w *writer) writeError(code int, data []byte) (int, error) { return len(data), nil } -func (w *writer) writeResponse(data []byte) (int, error) { +func (w *ChatWriter) writeResponse(data []byte) (int, error) { var chatResponse api.ChatResponse err := json.Unmarshal(data, &chatResponse) if err != nil { @@ -270,7 +322,7 @@ func (w *writer) writeResponse(data []byte) (int, error) { return len(data), nil } -func (w *writer) Write(data []byte) (int, error) { +func (w *ChatWriter) Write(data []byte) (int, error) { code := w.ResponseWriter.Status() if code != http.StatusOK { return w.writeError(code, data) @@ -279,7 +331,92 @@ func (w *writer) Write(data []byte) (int, error) { return w.writeResponse(data) } -func Middleware() gin.HandlerFunc { +func (w *ListWriter) writeResponse(data []byte) (int, error) { + var listResponse api.ListResponse + err := json.Unmarshal(data, &listResponse) + if err != nil { + return 0, err + } + + w.ResponseWriter.Header().Set("Content-Type", "application/json") + err = json.NewEncoder(w.ResponseWriter).Encode(toListCompletion(listResponse)) + if err != nil { + return 0, err + } + + return len(data), nil +} + +func (w *ListWriter) Write(data []byte) (int, error) { + code := w.ResponseWriter.Status() + if code != http.StatusOK { + return w.writeError(code, data) + } + + return w.writeResponse(data) +} + +func (w *RetrieveWriter) writeResponse(data []byte) (int, error) { + var showResponse api.ShowResponse + err := json.Unmarshal(data, &showResponse) + if err != nil { + return 0, err + } + + // retrieve completion + w.ResponseWriter.Header().Set("Content-Type", "application/json") + err = json.NewEncoder(w.ResponseWriter).Encode(toModel(showResponse, w.model)) + if err != nil { + return 0, err + } + + return len(data), nil +} + +func (w *RetrieveWriter) Write(data []byte) (int, error) { + code := w.ResponseWriter.Status() + if code != http.StatusOK { + return w.writeError(code, data) + } + + return w.writeResponse(data) +} + +func ListMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + w := &ListWriter{ + BaseWriter: BaseWriter{ResponseWriter: c.Writer}, + } + + c.Writer = w + + c.Next() + } +} + +func RetrieveMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + var b bytes.Buffer + if err := json.NewEncoder(&b).Encode(api.ShowRequest{Name: c.Param("model")}); err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, NewError(http.StatusInternalServerError, err.Error())) + return + } + + c.Request.Body = io.NopCloser(&b) + + // response writer + w := &RetrieveWriter{ + BaseWriter: BaseWriter{ResponseWriter: c.Writer}, + model: c.Param("model"), + } + + c.Writer = w + + c.Next() + } +} + +func ChatMiddleware() gin.HandlerFunc { return func(c *gin.Context) { var req ChatCompletionRequest err := c.ShouldBindJSON(&req) @@ -294,17 +431,17 @@ func Middleware() gin.HandlerFunc { } var b bytes.Buffer - if err := json.NewEncoder(&b).Encode(fromRequest(req)); err != nil { + if err := json.NewEncoder(&b).Encode(fromChatRequest(req)); err != nil { c.AbortWithStatusJSON(http.StatusInternalServerError, NewError(http.StatusInternalServerError, err.Error())) return } c.Request.Body = io.NopCloser(&b) - w := &writer{ - ResponseWriter: c.Writer, - stream: req.Stream, - id: fmt.Sprintf("chatcmpl-%d", rand.Intn(999)), + w := &ChatWriter{ + BaseWriter: BaseWriter{ResponseWriter: c.Writer}, + stream: req.Stream, + id: fmt.Sprintf("chatcmpl-%d", rand.Intn(999)), } c.Writer = w diff --git a/openai/openai_test.go b/openai/openai_test.go new file mode 100644 index 00000000..1f335b96 --- /dev/null +++ b/openai/openai_test.go @@ -0,0 +1,170 @@ +package openai + +import ( + "bytes" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/gin-gonic/gin" + "github.com/ollama/ollama/api" + "github.com/stretchr/testify/assert" +) + +func TestMiddleware(t *testing.T) { + type testCase struct { + Name string + Method string + Path string + TestPath string + Handler func() gin.HandlerFunc + Endpoint func(c *gin.Context) + Setup func(t *testing.T, req *http.Request) + Expected func(t *testing.T, resp *httptest.ResponseRecorder) + } + + testCases := []testCase{ + { + Name: "chat handler", + Method: http.MethodPost, + Path: "/api/chat", + TestPath: "/api/chat", + Handler: ChatMiddleware, + Endpoint: func(c *gin.Context) { + var chatReq api.ChatRequest + if err := c.ShouldBindJSON(&chatReq); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request"}) + return + } + + userMessage := chatReq.Messages[0].Content + var assistantMessage string + + switch userMessage { + case "Hello": + assistantMessage = "Hello!" + default: + assistantMessage = "I'm not sure how to respond to that." + } + + c.JSON(http.StatusOK, api.ChatResponse{ + Message: api.Message{ + Role: "assistant", + Content: assistantMessage, + }, + }) + }, + Setup: func(t *testing.T, req *http.Request) { + body := ChatCompletionRequest{ + Model: "test-model", + Messages: []Message{{Role: "user", Content: "Hello"}}, + } + + bodyBytes, _ := json.Marshal(body) + + req.Body = io.NopCloser(bytes.NewReader(bodyBytes)) + req.Header.Set("Content-Type", "application/json") + }, + Expected: func(t *testing.T, resp *httptest.ResponseRecorder) { + var chatResp ChatCompletion + if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil { + t.Fatal(err) + } + + if chatResp.Object != "chat.completion" { + t.Fatalf("expected chat.completion, got %s", chatResp.Object) + } + + if chatResp.Choices[0].Message.Content != "Hello!" { + t.Fatalf("expected Hello!, got %s", chatResp.Choices[0].Message.Content) + } + }, + }, + { + Name: "list handler", + Method: http.MethodGet, + Path: "/api/tags", + TestPath: "/api/tags", + Handler: ListMiddleware, + Endpoint: func(c *gin.Context) { + c.JSON(http.StatusOK, api.ListResponse{ + Models: []api.ListModelResponse{ + { + Name: "Test Model", + }, + }, + }) + }, + Expected: func(t *testing.T, resp *httptest.ResponseRecorder) { + var listResp ListCompletion + if err := json.NewDecoder(resp.Body).Decode(&listResp); err != nil { + t.Fatal(err) + } + + if listResp.Object != "list" { + t.Fatalf("expected list, got %s", listResp.Object) + } + + if len(listResp.Data) != 1 { + t.Fatalf("expected 1, got %d", len(listResp.Data)) + } + + if listResp.Data[0].Id != "Test Model" { + t.Fatalf("expected Test Model, got %s", listResp.Data[0].Id) + } + }, + }, + { + Name: "retrieve model", + Method: http.MethodGet, + Path: "/api/show/:model", + TestPath: "/api/show/test-model", + Handler: RetrieveMiddleware, + Endpoint: func(c *gin.Context) { + c.JSON(http.StatusOK, api.ShowResponse{ + ModifiedAt: time.Date(2024, 6, 17, 13, 45, 0, 0, time.UTC), + }) + }, + Expected: func(t *testing.T, resp *httptest.ResponseRecorder) { + var retrieveResp Model + if err := json.NewDecoder(resp.Body).Decode(&retrieveResp); err != nil { + t.Fatal(err) + } + + if retrieveResp.Object != "model" { + t.Fatalf("Expected object to be model, got %s", retrieveResp.Object) + } + + if retrieveResp.Id != "test-model" { + t.Fatalf("Expected id to be test-model, got %s", retrieveResp.Id) + } + }, + }, + } + + gin.SetMode(gin.TestMode) + router := gin.New() + + for _, tc := range testCases { + t.Run(tc.Name, func(t *testing.T) { + router = gin.New() + router.Use(tc.Handler()) + router.Handle(tc.Method, tc.Path, tc.Endpoint) + req, _ := http.NewRequest(tc.Method, tc.TestPath, nil) + + if tc.Setup != nil { + tc.Setup(t, req) + } + + resp := httptest.NewRecorder() + router.ServeHTTP(resp, req) + + assert.Equal(t, http.StatusOK, resp.Code) + + tc.Expected(t, resp) + }) + } +} diff --git a/server/routes.go b/server/routes.go index 76ead072..ad236450 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1039,7 +1039,9 @@ func (s *Server) GenerateRoutes() http.Handler { r.GET("/api/ps", s.ProcessHandler) // Compatibility endpoints - r.POST("/v1/chat/completions", openai.Middleware(), s.ChatHandler) + r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler) + r.GET("/v1/models", openai.ListMiddleware(), s.ListModelsHandler) + r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowModelHandler) for _, method := range []string{http.MethodGet, http.MethodHead} { r.Handle(method, "/", func(c *gin.Context) { diff --git a/server/routes_test.go b/server/routes_test.go index 5a5c0fbb..50eaf7e9 100644 --- a/server/routes_test.go +++ b/server/routes_test.go @@ -20,6 +20,7 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/llm" + "github.com/ollama/ollama/openai" "github.com/ollama/ollama/parser" "github.com/ollama/ollama/types/model" "github.com/ollama/ollama/version" @@ -105,6 +106,24 @@ func Test_Routes(t *testing.T) { assert.Empty(t, len(modelList.Models)) }, }, + { + Name: "openai empty list", + Method: http.MethodGet, + Path: "/v1/models", + Expected: func(t *testing.T, resp *http.Response) { + contentType := resp.Header.Get("Content-Type") + assert.Equal(t, "application/json", contentType) + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + var modelList openai.ListCompletion + err = json.Unmarshal(body, &modelList) + require.NoError(t, err) + + assert.Equal(t, "list", modelList.Object) + assert.Empty(t, modelList.Data) + }, + }, { Name: "Tags Handler (yes tags)", Method: http.MethodGet, @@ -128,6 +147,25 @@ func Test_Routes(t *testing.T) { assert.Equal(t, "test-model:latest", modelList.Models[0].Name) }, }, + { + Name: "openai list models with tags", + Method: http.MethodGet, + Path: "/v1/models", + Expected: func(t *testing.T, resp *http.Response) { + contentType := resp.Header.Get("Content-Type") + assert.Equal(t, "application/json", contentType) + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + var modelList openai.ListCompletion + err = json.Unmarshal(body, &modelList) + require.NoError(t, err) + + assert.Len(t, modelList.Data, 1) + assert.Equal(t, "test-model:latest", modelList.Data[0].Id) + assert.Equal(t, "library", modelList.Data[0].OwnedBy) + }, + }, { Name: "Create Model Handler", Method: http.MethodPost, @@ -216,6 +254,24 @@ func Test_Routes(t *testing.T) { assert.InDelta(t, 0, showResp.ModelInfo["general.parameter_count"], 1e-9, "Parameter count should be 0") }, }, + { + Name: "openai retrieve model handler", + Method: http.MethodGet, + Path: "/v1/models/show-model", + Expected: func(t *testing.T, resp *http.Response) { + contentType := resp.Header.Get("Content-Type") + assert.Equal(t, "application/json", contentType) + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + var retrieveResp api.RetrieveModelResponse + err = json.Unmarshal(body, &retrieveResp) + require.NoError(t, err) + + assert.Equal(t, "show-model", retrieveResp.Id) + assert.Equal(t, "library", retrieveResp.OwnedBy) + }, + }, } t.Setenv("OLLAMA_MODELS", t.TempDir()) From 69c04eecc4b969149e43d6941f06a7d60dc5d191 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 2 Jul 2024 12:46:14 -0700 Subject: [PATCH 09/11] Add windows radeon concurreny note --- docs/faq.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/faq.md b/docs/faq.md index 841f1d13..57411246 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -266,8 +266,10 @@ If there is insufficient available memory to load a new model request while one Parallel request processing for a given model results in increasing the context size by the number of parallel requests. For example, a 2K context with 4 parallel requests will result in an 8K context and additional memory allocation. -The following server settings may be used to adjust how Ollama handles concurrent requests: +The following server settings may be used to adjust how Ollama handles concurrent requests on most platforms: - `OLLAMA_MAX_LOADED_MODELS` - The maximum number of models that can be loaded concurrently provided they fit in available memory. The default is 3 * the number of GPUs or 3 for CPU inference. - `OLLAMA_NUM_PARALLEL` - The maximum number of parallel requests each model will process at the same time. The default will auto-select either 4 or 1 based on available memory. - `OLLAMA_MAX_QUEUE` - The maximum number of requests Ollama will queue when busy before rejecting additional requests. The default is 512 + +Note: Windows with Radeon GPUs currently default to 1 model maximum due to limitations in ROCm v5.7 for available VRAM reporting. Once ROCm v6 is available, Windows Radeon will follow the defaults above. You may enable concurrent model loads on Radeon on Windows, but ensure you don't load more models than will fit into your GPUs VRAM. \ No newline at end of file From d626b99b547c43e57390cec90ba2ae01adf0f429 Mon Sep 17 00:00:00 2001 From: royjhan <65097070+royjhan@users.noreply.github.com> Date: Tue, 2 Jul 2024 16:01:45 -0700 Subject: [PATCH 10/11] OpenAI: v1/completions compatibility (#5209) * OpenAI v1 models * Refactor Writers * Add Test Co-Authored-By: Attila Kerekes * Credit Co-Author Co-Authored-By: Attila Kerekes <439392+keriati@users.noreply.github.com> * Empty List Testing * Use Namespace for Ownedby * Update Test * Add back envconfig * v1/models docs * Use ModelName Parser * Test Names * Remove Docs * Clean Up * Test name Co-authored-by: Jeffrey Morgan * Add Middleware for Chat and List * Completions Endpoint * Testing Cleanup * Test with Fatal * Add functionality to chat test * Rename function * float types * type cleanup * cleaning * more cleaning * Extra test cases * merge conflicts * merge conflicts * merge conflicts * merge conflicts * cleaning * cleaning --------- Co-authored-by: Attila Kerekes <439392+keriati@users.noreply.github.com> Co-authored-by: Jeffrey Morgan --- openai/openai.go | 223 +++++++++++++++++++++++++++++++++++++++++- openai/openai_test.go | 132 ++++++++++++++++++++++++- server/routes.go | 1 + 3 files changed, 353 insertions(+), 3 deletions(-) diff --git a/openai/openai.go b/openai/openai.go index 01da4440..f1e75bf2 100644 --- a/openai/openai.go +++ b/openai/openai.go @@ -43,6 +43,12 @@ type ChunkChoice struct { FinishReason *string `json:"finish_reason"` } +type CompleteChunkChoice struct { + Text string `json:"text"` + Index int `json:"index"` + FinishReason *string `json:"finish_reason"` +} + type Usage struct { PromptTokens int `json:"prompt_tokens"` CompletionTokens int `json:"completion_tokens"` @@ -86,6 +92,39 @@ type ChatCompletionChunk struct { Choices []ChunkChoice `json:"choices"` } +// TODO (https://github.com/ollama/ollama/issues/5259): support []string, []int and [][]int +type CompletionRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt"` + FrequencyPenalty float32 `json:"frequency_penalty"` + MaxTokens *int `json:"max_tokens"` + PresencePenalty float32 `json:"presence_penalty"` + Seed *int `json:"seed"` + Stop any `json:"stop"` + Stream bool `json:"stream"` + Temperature *float32 `json:"temperature"` + TopP float32 `json:"top_p"` +} + +type Completion struct { + Id string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + Model string `json:"model"` + SystemFingerprint string `json:"system_fingerprint"` + Choices []CompleteChunkChoice `json:"choices"` + Usage Usage `json:"usage,omitempty"` +} + +type CompletionChunk struct { + Id string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + Choices []CompleteChunkChoice `json:"choices"` + Model string `json:"model"` + SystemFingerprint string `json:"system_fingerprint"` +} + type Model struct { Id string `json:"id"` Object string `json:"object"` @@ -158,6 +197,52 @@ func toChunk(id string, r api.ChatResponse) ChatCompletionChunk { } } +func toCompletion(id string, r api.GenerateResponse) Completion { + return Completion{ + Id: id, + Object: "text_completion", + Created: r.CreatedAt.Unix(), + Model: r.Model, + SystemFingerprint: "fp_ollama", + Choices: []CompleteChunkChoice{{ + Text: r.Response, + Index: 0, + FinishReason: func(reason string) *string { + if len(reason) > 0 { + return &reason + } + return nil + }(r.DoneReason), + }}, + Usage: Usage{ + // TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count + PromptTokens: r.PromptEvalCount, + CompletionTokens: r.EvalCount, + TotalTokens: r.PromptEvalCount + r.EvalCount, + }, + } +} + +func toCompleteChunk(id string, r api.GenerateResponse) CompletionChunk { + return CompletionChunk{ + Id: id, + Object: "text_completion", + Created: time.Now().Unix(), + Model: r.Model, + SystemFingerprint: "fp_ollama", + Choices: []CompleteChunkChoice{{ + Text: r.Response, + Index: 0, + FinishReason: func(reason string) *string { + if len(reason) > 0 { + return &reason + } + return nil + }(r.DoneReason), + }}, + } +} + func toListCompletion(r api.ListResponse) ListCompletion { var data []Model for _, m := range r.Models { @@ -195,7 +280,7 @@ func fromChatRequest(r ChatCompletionRequest) api.ChatRequest { switch stop := r.Stop.(type) { case string: options["stop"] = []string{stop} - case []interface{}: + case []any: var stops []string for _, s := range stop { if str, ok := s.(string); ok { @@ -247,6 +332,52 @@ func fromChatRequest(r ChatCompletionRequest) api.ChatRequest { } } +func fromCompleteRequest(r CompletionRequest) (api.GenerateRequest, error) { + options := make(map[string]any) + + switch stop := r.Stop.(type) { + case string: + options["stop"] = []string{stop} + case []string: + options["stop"] = stop + default: + if r.Stop != nil { + return api.GenerateRequest{}, fmt.Errorf("invalid type for 'stop' field: %T", r.Stop) + } + } + + if r.MaxTokens != nil { + options["num_predict"] = *r.MaxTokens + } + + if r.Temperature != nil { + options["temperature"] = *r.Temperature * 2.0 + } else { + options["temperature"] = 1.0 + } + + if r.Seed != nil { + options["seed"] = *r.Seed + } + + options["frequency_penalty"] = r.FrequencyPenalty * 2.0 + + options["presence_penalty"] = r.PresencePenalty * 2.0 + + if r.TopP != 0.0 { + options["top_p"] = r.TopP + } else { + options["top_p"] = 1.0 + } + + return api.GenerateRequest{ + Model: r.Model, + Prompt: r.Prompt, + Options: options, + Stream: &r.Stream, + }, nil +} + type BaseWriter struct { gin.ResponseWriter } @@ -257,6 +388,12 @@ type ChatWriter struct { BaseWriter } +type CompleteWriter struct { + stream bool + id string + BaseWriter +} + type ListWriter struct { BaseWriter } @@ -331,6 +468,55 @@ func (w *ChatWriter) Write(data []byte) (int, error) { return w.writeResponse(data) } +func (w *CompleteWriter) writeResponse(data []byte) (int, error) { + var generateResponse api.GenerateResponse + err := json.Unmarshal(data, &generateResponse) + if err != nil { + return 0, err + } + + // completion chunk + if w.stream { + d, err := json.Marshal(toCompleteChunk(w.id, generateResponse)) + if err != nil { + return 0, err + } + + w.ResponseWriter.Header().Set("Content-Type", "text/event-stream") + _, err = w.ResponseWriter.Write([]byte(fmt.Sprintf("data: %s\n\n", d))) + if err != nil { + return 0, err + } + + if generateResponse.Done { + _, err = w.ResponseWriter.Write([]byte("data: [DONE]\n\n")) + if err != nil { + return 0, err + } + } + + return len(data), nil + } + + // completion + w.ResponseWriter.Header().Set("Content-Type", "application/json") + err = json.NewEncoder(w.ResponseWriter).Encode(toCompletion(w.id, generateResponse)) + if err != nil { + return 0, err + } + + return len(data), nil +} + +func (w *CompleteWriter) Write(data []byte) (int, error) { + code := w.ResponseWriter.Status() + if code != http.StatusOK { + return w.writeError(code, data) + } + + return w.writeResponse(data) +} + func (w *ListWriter) writeResponse(data []byte) (int, error) { var listResponse api.ListResponse err := json.Unmarshal(data, &listResponse) @@ -416,6 +602,41 @@ func RetrieveMiddleware() gin.HandlerFunc { } } +func CompletionsMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + var req CompletionRequest + err := c.ShouldBindJSON(&req) + if err != nil { + c.AbortWithStatusJSON(http.StatusBadRequest, NewError(http.StatusBadRequest, err.Error())) + return + } + + var b bytes.Buffer + genReq, err := fromCompleteRequest(req) + if err != nil { + c.AbortWithStatusJSON(http.StatusBadRequest, NewError(http.StatusBadRequest, err.Error())) + return + } + + if err := json.NewEncoder(&b).Encode(genReq); err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, NewError(http.StatusInternalServerError, err.Error())) + return + } + + c.Request.Body = io.NopCloser(&b) + + w := &CompleteWriter{ + BaseWriter: BaseWriter{ResponseWriter: c.Writer}, + stream: req.Stream, + id: fmt.Sprintf("cmpl-%d", rand.Intn(999)), + } + + c.Writer = w + + c.Next() + } +} + func ChatMiddleware() gin.HandlerFunc { return func(c *gin.Context) { var req ChatCompletionRequest diff --git a/openai/openai_test.go b/openai/openai_test.go index 1f335b96..4d21382c 100644 --- a/openai/openai_test.go +++ b/openai/openai_test.go @@ -3,9 +3,11 @@ package openai import ( "bytes" "encoding/json" + "fmt" "io" "net/http" "net/http/httptest" + "strings" "testing" "time" @@ -69,6 +71,8 @@ func TestMiddleware(t *testing.T) { req.Header.Set("Content-Type", "application/json") }, Expected: func(t *testing.T, resp *httptest.ResponseRecorder) { + assert.Equal(t, http.StatusOK, resp.Code) + var chatResp ChatCompletion if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil { t.Fatal(err) @@ -83,6 +87,130 @@ func TestMiddleware(t *testing.T) { } }, }, + { + Name: "completions handler", + Method: http.MethodPost, + Path: "/api/generate", + TestPath: "/api/generate", + Handler: CompletionsMiddleware, + Endpoint: func(c *gin.Context) { + c.JSON(http.StatusOK, api.GenerateResponse{ + Response: "Hello!", + }) + }, + Setup: func(t *testing.T, req *http.Request) { + body := CompletionRequest{ + Model: "test-model", + Prompt: "Hello", + } + + bodyBytes, _ := json.Marshal(body) + + req.Body = io.NopCloser(bytes.NewReader(bodyBytes)) + req.Header.Set("Content-Type", "application/json") + }, + Expected: func(t *testing.T, resp *httptest.ResponseRecorder) { + assert.Equal(t, http.StatusOK, resp.Code) + var completionResp Completion + if err := json.NewDecoder(resp.Body).Decode(&completionResp); err != nil { + t.Fatal(err) + } + + if completionResp.Object != "text_completion" { + t.Fatalf("expected text_completion, got %s", completionResp.Object) + } + + if completionResp.Choices[0].Text != "Hello!" { + t.Fatalf("expected Hello!, got %s", completionResp.Choices[0].Text) + } + }, + }, + { + Name: "completions handler with params", + Method: http.MethodPost, + Path: "/api/generate", + TestPath: "/api/generate", + Handler: CompletionsMiddleware, + Endpoint: func(c *gin.Context) { + var generateReq api.GenerateRequest + if err := c.ShouldBindJSON(&generateReq); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request"}) + return + } + + temperature := generateReq.Options["temperature"].(float64) + var assistantMessage string + + switch temperature { + case 1.6: + assistantMessage = "Received temperature of 1.6" + default: + assistantMessage = fmt.Sprintf("Received temperature of %f", temperature) + } + + c.JSON(http.StatusOK, api.GenerateResponse{ + Response: assistantMessage, + }) + }, + Setup: func(t *testing.T, req *http.Request) { + temp := float32(0.8) + body := CompletionRequest{ + Model: "test-model", + Prompt: "Hello", + Temperature: &temp, + } + + bodyBytes, _ := json.Marshal(body) + + req.Body = io.NopCloser(bytes.NewReader(bodyBytes)) + req.Header.Set("Content-Type", "application/json") + }, + Expected: func(t *testing.T, resp *httptest.ResponseRecorder) { + assert.Equal(t, http.StatusOK, resp.Code) + var completionResp Completion + if err := json.NewDecoder(resp.Body).Decode(&completionResp); err != nil { + t.Fatal(err) + } + + if completionResp.Object != "text_completion" { + t.Fatalf("expected text_completion, got %s", completionResp.Object) + } + + if completionResp.Choices[0].Text != "Received temperature of 1.6" { + t.Fatalf("expected Received temperature of 1.6, got %s", completionResp.Choices[0].Text) + } + }, + }, + { + Name: "completions handler with error", + Method: http.MethodPost, + Path: "/api/generate", + TestPath: "/api/generate", + Handler: CompletionsMiddleware, + Endpoint: func(c *gin.Context) { + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request"}) + }, + Setup: func(t *testing.T, req *http.Request) { + body := CompletionRequest{ + Model: "test-model", + Prompt: "Hello", + } + + bodyBytes, _ := json.Marshal(body) + + req.Body = io.NopCloser(bytes.NewReader(bodyBytes)) + req.Header.Set("Content-Type", "application/json") + }, + Expected: func(t *testing.T, resp *httptest.ResponseRecorder) { + if resp.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", resp.Code) + } + + if !strings.Contains(resp.Body.String(), `"invalid request"`) { + t.Fatalf("error was not forwarded") + } + }, + }, { Name: "list handler", Method: http.MethodGet, @@ -99,6 +227,8 @@ func TestMiddleware(t *testing.T) { }) }, Expected: func(t *testing.T, resp *httptest.ResponseRecorder) { + assert.Equal(t, http.StatusOK, resp.Code) + var listResp ListCompletion if err := json.NewDecoder(resp.Body).Decode(&listResp); err != nil { t.Fatal(err) @@ -162,8 +292,6 @@ func TestMiddleware(t *testing.T) { resp := httptest.NewRecorder() router.ServeHTTP(resp, req) - assert.Equal(t, http.StatusOK, resp.Code) - tc.Expected(t, resp) }) } diff --git a/server/routes.go b/server/routes.go index 9fe5fcc4..41c92084 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1054,6 +1054,7 @@ func (s *Server) GenerateRoutes() http.Handler { // Compatibility endpoints r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler) + r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler) r.GET("/v1/models", openai.ListMiddleware(), s.ListModelsHandler) r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowModelHandler) From 65a5040e09d34b4e4237a4ac1996e2fb2a112bb3 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Tue, 2 Jul 2024 16:42:17 -0700 Subject: [PATCH 11/11] fix generate template --- server/routes.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/server/routes.go b/server/routes.go index 41c92084..b14a146c 100644 --- a/server/routes.go +++ b/server/routes.go @@ -176,11 +176,7 @@ func (s *Server) GenerateHandler(c *gin.Context) { prompt = req.Prompt case req.Prompt != "": if req.Template == "" { - model.Template, err = template.Parse(req.Template) - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return - } + tmpl = model.Template } if req.System == "" {