diff --git a/CMakeLists.txt b/CMakeLists.txt index e989b127..e57d9f65 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,7 @@ set(GGML_SCHED_MAX_COPIES 4) set(GGML_LLAMAFILE ON) set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128) set(GGML_CUDA_GRAPHS ON) +set(GGML_CUDA_FA ON) if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+")) diff --git a/CMakePresets.json b/CMakePresets.json index 2a29b8eb..8060bed4 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -28,7 +28,7 @@ "name": "CUDA 12", "inherits": [ "CUDA" ], "cacheVariables": { - "CMAKE_CUDA_ARCHITECTURES": "50;60;61;70;75;80;86;87;89;90;90a;100" + "CMAKE_CUDA_ARCHITECTURES": "50;60;61;70;75;80;86;87;89;90;90a;120" } }, { diff --git a/README.md b/README.md index 67230d18..09c720ad 100644 --- a/README.md +++ b/README.md @@ -408,6 +408,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot) - [yla](https://github.com/danielekp/yla) (Web interface to freely interact with your customized models) - [LangBot](https://github.com/RockChinQ/LangBot) (LLM-based instant messaging bots platform, with Agents, RAG features, supports multiple platforms) +- [1Panel](https://github.com/1Panel-dev/1Panel/) (Web-based Linux Server Management Tool) ### Cloud diff --git a/llama/llama.go b/llama/llama.go index 9add38c2..0c4fca43 100644 --- a/llama/llama.go +++ b/llama/llama.go @@ -262,7 +262,7 @@ func LoadModelFromFile(modelPath string, params ModelParams) (*Model, error) { cparams.progress_callback_user_data = unsafe.Pointer(&handle) } - m := Model{c: C.llama_load_model_from_file(C.CString(modelPath), cparams)} + m := Model{c: C.llama_model_load_from_file(C.CString(modelPath), cparams)} if m.c == nil { return nil, fmt.Errorf("unable to load model: %s", modelPath) } @@ -271,12 +271,12 @@ func LoadModelFromFile(modelPath string, params ModelParams) (*Model, error) { } func FreeModel(model *Model) { - C.llama_free_model(model.c) + C.llama_model_free(model.c) } func NewContextWithModel(model *Model, params ContextParams) (*Context, error) { c := Context{ - c: C.llama_new_context_with_model(model.c, params.c), + c: C.llama_init_from_model(model.c, params.c), numThreads: int(params.c.n_threads), } if c.c == nil { @@ -287,15 +287,15 @@ func NewContextWithModel(model *Model, params ContextParams) (*Context, error) { } func (m *Model) NumVocab() int { - return int(C.llama_n_vocab(m.Vocab())) + return int(C.llama_vocab_n_tokens(m.Vocab())) } func (m *Model) TokenIsEog(token int) bool { - return bool(C.llama_token_is_eog(m.Vocab(), C.llama_token(token))) + return bool(C.llama_vocab_is_eog(m.Vocab(), C.llama_token(token))) } func (m *Model) AddBOSToken() bool { - return bool(C.llama_add_bos_token(m.Vocab())) + return bool(C.llama_vocab_get_add_bos(m.Vocab())) } func (m *Model) ApplyLoraFromFile(context *Context, loraPath string, scale float32, threads int) error { @@ -478,7 +478,7 @@ func (m *Model) Tokenize(text string, addSpecial bool, parseSpecial bool) ([]int } func (m *Model) NEmbd() int { - return int(C.llama_n_embd(m.c)) + return int(C.llama_model_n_embd(m.c)) } func Quantize(infile, outfile string, ftype uint32) error { diff --git a/runner/llamarunner/runner.go b/runner/llamarunner/runner.go index 1afc793e..82880c98 100644 --- a/runner/llamarunner/runner.go +++ b/runner/llamarunner/runner.go @@ -968,13 +968,14 @@ func Execute(args []string) error { server.cond = sync.NewCond(&server.mu) ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go server.run(ctx) addr := "127.0.0.1:" + strconv.Itoa(*port) listener, err := net.Listen("tcp", addr) if err != nil { fmt.Println("Listen error:", err) - cancel() return err } defer listener.Close() @@ -994,6 +995,5 @@ func Execute(args []string) error { return err } - cancel() return nil } diff --git a/runner/ollamarunner/runner.go b/runner/ollamarunner/runner.go index 6b4c7be0..db9b271e 100644 --- a/runner/ollamarunner/runner.go +++ b/runner/ollamarunner/runner.go @@ -890,13 +890,14 @@ func Execute(args []string) error { server.cond = sync.NewCond(&server.mu) ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go server.run(ctx) addr := "127.0.0.1:" + strconv.Itoa(*port) listener, err := net.Listen("tcp", addr) if err != nil { fmt.Println("Listen error:", err) - cancel() return err } defer listener.Close() @@ -916,6 +917,5 @@ func Execute(args []string) error { return err } - cancel() return nil } diff --git a/server/create.go b/server/create.go index 2261116c..4294554b 100644 --- a/server/create.go +++ b/server/create.go @@ -8,6 +8,7 @@ import ( "errors" "fmt" "io" + "io/fs" "log/slog" "net/http" "os" @@ -34,6 +35,7 @@ var ( errOnlyGGUFSupported = errors.New("supplied file was not in GGUF format") errUnknownType = errors.New("unknown type") errNeitherFromOrFiles = errors.New("neither 'from' or 'files' was specified") + errFilePath = errors.New("file path must be relative") ) func (s *Server) CreateHandler(c *gin.Context) { @@ -46,6 +48,13 @@ func (s *Server) CreateHandler(c *gin.Context) { return } + for v := range r.Files { + if !fs.ValidPath(v) { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errFilePath.Error()}) + return + } + } + name := model.ParseName(cmp.Or(r.Model, r.Name)) if !name.IsValid() { c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg}) @@ -104,7 +113,7 @@ func (s *Server) CreateHandler(c *gin.Context) { if r.Adapters != nil { adapterLayers, err = convertModelFromFiles(r.Adapters, baseLayers, true, fn) if err != nil { - for _, badReq := range []error{errNoFilesProvided, errOnlyOneAdapterSupported, errOnlyGGUFSupported, errUnknownType} { + for _, badReq := range []error{errNoFilesProvided, errOnlyOneAdapterSupported, errOnlyGGUFSupported, errUnknownType, errFilePath} { if errors.Is(err, badReq) { ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest} return @@ -221,8 +230,22 @@ func convertFromSafetensors(files map[string]string, baseLayers []*layerGGML, is return nil, err } defer os.RemoveAll(tmpDir) + // Set up a root to validate paths + root, err := os.OpenRoot(tmpDir) + if err != nil { + return nil, err + } + defer root.Close() for fp, digest := range files { + if !fs.ValidPath(fp) { + return nil, fmt.Errorf("%w: %s", errFilePath, fp) + } + if _, err := root.Stat(fp); err != nil && !errors.Is(err, fs.ErrNotExist) { + // Path is likely outside the root + return nil, fmt.Errorf("%w: %s: %s", errFilePath, err, fp) + } + blobPath, err := GetBlobsPath(digest) if err != nil { return nil, err @@ -270,6 +293,7 @@ func convertFromSafetensors(files map[string]string, baseLayers []*layerGGML, is if err != nil { return nil, err } + defer bin.Close() f, _, err := ggml.Decode(bin, 0) if err != nil { diff --git a/server/create_test.go b/server/create_test.go new file mode 100644 index 00000000..59a07ff1 --- /dev/null +++ b/server/create_test.go @@ -0,0 +1,106 @@ +package server + +import ( + "bytes" + "encoding/binary" + "errors" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/ollama/ollama/api" +) + +func TestConvertFromSafetensors(t *testing.T) { + t.Setenv("OLLAMA_MODELS", t.TempDir()) + + // Helper function to create a new layer and return its digest + makeTemp := func(content string) string { + l, err := NewLayer(strings.NewReader(content), "application/octet-stream") + if err != nil { + t.Fatalf("Failed to create layer: %v", err) + } + return l.Digest + } + + // Create a safetensors compatible file with empty JSON content + var buf bytes.Buffer + headerSize := int64(len("{}")) + binary.Write(&buf, binary.LittleEndian, headerSize) + buf.WriteString("{}") + + model := makeTemp(buf.String()) + config := makeTemp(`{ + "architectures": ["LlamaForCausalLM"], + "vocab_size": 32000 + }`) + tokenizer := makeTemp(`{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ] + }`) + + tests := []struct { + name string + filePath string + wantErr error + }{ + // Invalid + { + name: "InvalidRelativePathShallow", + filePath: filepath.Join("..", "file.safetensors"), + wantErr: errFilePath, + }, + { + name: "InvalidRelativePathDeep", + filePath: filepath.Join("..", "..", "..", "..", "..", "..", "data", "file.txt"), + wantErr: errFilePath, + }, + { + name: "InvalidNestedPath", + filePath: filepath.Join("dir", "..", "..", "..", "..", "..", "other.safetensors"), + wantErr: errFilePath, + }, + { + name: "AbsolutePathOutsideRoot", + filePath: filepath.Join(os.TempDir(), "model.safetensors"), + wantErr: errFilePath, // Should fail since it's outside tmpDir + }, + { + name: "ValidRelativePath", + filePath: "model.safetensors", + wantErr: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create the minimum required file map for convertFromSafetensors + files := map[string]string{ + tt.filePath: model, + "config.json": config, + "tokenizer.json": tokenizer, + } + + _, err := convertFromSafetensors(files, nil, false, func(resp api.ProgressResponse) {}) + + if (tt.wantErr == nil && err != nil) || + (tt.wantErr != nil && err == nil) || + (tt.wantErr != nil && !errors.Is(err, tt.wantErr)) { + t.Errorf("convertFromSafetensors() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} diff --git a/server/internal/client/ollama/registry.go b/server/internal/client/ollama/registry.go index d4d58ed6..e4c36d7d 100644 --- a/server/internal/client/ollama/registry.go +++ b/server/internal/client/ollama/registry.go @@ -147,14 +147,23 @@ func (e *Error) UnmarshalJSON(b []byte) error { return nil } -var defaultName = func() names.Name { - n := names.Parse("registry.ollama.ai/library/_:latest") +const DefaultMask = "registry.ollama.ai/library/_:latest" + +var defaultMask = func() names.Name { + n := names.Parse(DefaultMask) if !n.IsFullyQualified() { - panic("default name is not fully qualified") + panic("default mask is not fully qualified") } return n }() +// CompleteName returns a fully qualified name by merging the given name with +// the default mask. If the name is already fully qualified, it is returned +// unchanged. +func CompleteName(name string) string { + return names.Merge(names.Parse(name), defaultMask).String() +} + // Registry is a client for performing push and pull operations against an // Ollama registry. type Registry struct { @@ -249,7 +258,7 @@ type PushParams struct { // // The scheme is returned as provided by [names.ParseExtended]. func parseName(s, mask string) (scheme string, n names.Name, d blob.Digest, err error) { - maskName := defaultName + maskName := defaultMask if mask != "" { maskName = names.Parse(mask) if !maskName.IsFullyQualified() { diff --git a/server/internal/cmd/opp/internal/safetensors/safetensors.go b/server/internal/cmd/opp/internal/safetensors/safetensors.go index 7f3e9979..7a45b91d 100644 --- a/server/internal/cmd/opp/internal/safetensors/safetensors.go +++ b/server/internal/cmd/opp/internal/safetensors/safetensors.go @@ -86,6 +86,8 @@ func (m *Model) readTensors(fname string) ([]*Tensor, error) { return nil, err } + endOfHeader := 8 + headerSize // 8 bytes for header size plus the header itself + // TODO(bmizerany): do something with metadata? This could be another // header read if needed. We also need to figure out if the metadata is // present in only one .safetensors file or if each file may have their @@ -95,7 +97,8 @@ func (m *Model) readTensors(fname string) ([]*Tensor, error) { tt := make([]*Tensor, 0, len(raws)) for name, raw := range raws { - if !strings.HasPrefix(name, "model.layer") { + if name == "__metadata__" { + // TODO(bmizerany): do something with metadata? continue } var v struct { @@ -112,7 +115,8 @@ func (m *Model) readTensors(fname string) ([]*Tensor, error) { // TODO(bmizerany): after collecting, validate all offests make // tensors contiguous? - begin, end := v.Offsets[0], v.Offsets[1] + begin := endOfHeader + v.Offsets[0] + end := endOfHeader + v.Offsets[1] if err := checkBeginEnd(finfo.Size(), begin, end); err != nil { return nil, err } diff --git a/server/internal/cmd/opp/opp.go b/server/internal/cmd/opp/opp.go index cc10a72f..c21e71d5 100644 --- a/server/internal/cmd/opp/opp.go +++ b/server/internal/cmd/opp/opp.go @@ -228,6 +228,10 @@ func cmdImport(ctx context.Context, c *blob.DiskCache) error { flag.PrintDefaults() } flag.Parse(args) + if *flagAs == "" { + return fmt.Errorf("missing -as flag") + } + as := ollama.CompleteName(*flagAs) dir := cmp.Or(flag.Arg(0), ".") fmt.Fprintf(os.Stderr, "Reading %s\n", dir) @@ -311,7 +315,7 @@ func cmdImport(ctx context.Context, c *blob.DiskCache) error { if err != nil { return err } - return c.Link(*flagAs, d) + return c.Link(as, d) }() }() @@ -340,6 +344,8 @@ func cmdImport(ctx context.Context, c *blob.DiskCache) error { writeProgress() case err := <-done: writeProgress() + fmt.Println() + fmt.Println("Successfully imported", as) return err } }