Merge branch 'ollama:main' into main

This commit is contained in:
likelovewant
2025-03-01 15:42:15 +08:00
committed by GitHub
11 changed files with 171 additions and 20 deletions

View File

@@ -23,6 +23,7 @@ set(GGML_SCHED_MAX_COPIES 4)
set(GGML_LLAMAFILE ON) set(GGML_LLAMAFILE ON)
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128) set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
set(GGML_CUDA_GRAPHS ON) set(GGML_CUDA_GRAPHS ON)
set(GGML_CUDA_FA ON)
if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64") if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+")) OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))

View File

@@ -28,7 +28,7 @@
"name": "CUDA 12", "name": "CUDA 12",
"inherits": [ "CUDA" ], "inherits": [ "CUDA" ],
"cacheVariables": { "cacheVariables": {
"CMAKE_CUDA_ARCHITECTURES": "50;60;61;70;75;80;86;87;89;90;90a;100" "CMAKE_CUDA_ARCHITECTURES": "50;60;61;70;75;80;86;87;89;90;90a;120"
} }
}, },
{ {

View File

@@ -408,6 +408,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot) - [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot)
- [yla](https://github.com/danielekp/yla) (Web interface to freely interact with your customized models) - [yla](https://github.com/danielekp/yla) (Web interface to freely interact with your customized models)
- [LangBot](https://github.com/RockChinQ/LangBot) (LLM-based instant messaging bots platform, with Agents, RAG features, supports multiple platforms) - [LangBot](https://github.com/RockChinQ/LangBot) (LLM-based instant messaging bots platform, with Agents, RAG features, supports multiple platforms)
- [1Panel](https://github.com/1Panel-dev/1Panel/) (Web-based Linux Server Management Tool)
### Cloud ### Cloud

View File

@@ -262,7 +262,7 @@ func LoadModelFromFile(modelPath string, params ModelParams) (*Model, error) {
cparams.progress_callback_user_data = unsafe.Pointer(&handle) cparams.progress_callback_user_data = unsafe.Pointer(&handle)
} }
m := Model{c: C.llama_load_model_from_file(C.CString(modelPath), cparams)} m := Model{c: C.llama_model_load_from_file(C.CString(modelPath), cparams)}
if m.c == nil { if m.c == nil {
return nil, fmt.Errorf("unable to load model: %s", modelPath) return nil, fmt.Errorf("unable to load model: %s", modelPath)
} }
@@ -271,12 +271,12 @@ func LoadModelFromFile(modelPath string, params ModelParams) (*Model, error) {
} }
func FreeModel(model *Model) { func FreeModel(model *Model) {
C.llama_free_model(model.c) C.llama_model_free(model.c)
} }
func NewContextWithModel(model *Model, params ContextParams) (*Context, error) { func NewContextWithModel(model *Model, params ContextParams) (*Context, error) {
c := Context{ c := Context{
c: C.llama_new_context_with_model(model.c, params.c), c: C.llama_init_from_model(model.c, params.c),
numThreads: int(params.c.n_threads), numThreads: int(params.c.n_threads),
} }
if c.c == nil { if c.c == nil {
@@ -287,15 +287,15 @@ func NewContextWithModel(model *Model, params ContextParams) (*Context, error) {
} }
func (m *Model) NumVocab() int { func (m *Model) NumVocab() int {
return int(C.llama_n_vocab(m.Vocab())) return int(C.llama_vocab_n_tokens(m.Vocab()))
} }
func (m *Model) TokenIsEog(token int) bool { func (m *Model) TokenIsEog(token int) bool {
return bool(C.llama_token_is_eog(m.Vocab(), C.llama_token(token))) return bool(C.llama_vocab_is_eog(m.Vocab(), C.llama_token(token)))
} }
func (m *Model) AddBOSToken() bool { func (m *Model) AddBOSToken() bool {
return bool(C.llama_add_bos_token(m.Vocab())) return bool(C.llama_vocab_get_add_bos(m.Vocab()))
} }
func (m *Model) ApplyLoraFromFile(context *Context, loraPath string, scale float32, threads int) error { func (m *Model) ApplyLoraFromFile(context *Context, loraPath string, scale float32, threads int) error {
@@ -478,7 +478,7 @@ func (m *Model) Tokenize(text string, addSpecial bool, parseSpecial bool) ([]int
} }
func (m *Model) NEmbd() int { func (m *Model) NEmbd() int {
return int(C.llama_n_embd(m.c)) return int(C.llama_model_n_embd(m.c))
} }
func Quantize(infile, outfile string, ftype uint32) error { func Quantize(infile, outfile string, ftype uint32) error {

View File

@@ -968,13 +968,14 @@ func Execute(args []string) error {
server.cond = sync.NewCond(&server.mu) server.cond = sync.NewCond(&server.mu)
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go server.run(ctx) go server.run(ctx)
addr := "127.0.0.1:" + strconv.Itoa(*port) addr := "127.0.0.1:" + strconv.Itoa(*port)
listener, err := net.Listen("tcp", addr) listener, err := net.Listen("tcp", addr)
if err != nil { if err != nil {
fmt.Println("Listen error:", err) fmt.Println("Listen error:", err)
cancel()
return err return err
} }
defer listener.Close() defer listener.Close()
@@ -994,6 +995,5 @@ func Execute(args []string) error {
return err return err
} }
cancel()
return nil return nil
} }

View File

@@ -890,13 +890,14 @@ func Execute(args []string) error {
server.cond = sync.NewCond(&server.mu) server.cond = sync.NewCond(&server.mu)
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go server.run(ctx) go server.run(ctx)
addr := "127.0.0.1:" + strconv.Itoa(*port) addr := "127.0.0.1:" + strconv.Itoa(*port)
listener, err := net.Listen("tcp", addr) listener, err := net.Listen("tcp", addr)
if err != nil { if err != nil {
fmt.Println("Listen error:", err) fmt.Println("Listen error:", err)
cancel()
return err return err
} }
defer listener.Close() defer listener.Close()
@@ -916,6 +917,5 @@ func Execute(args []string) error {
return err return err
} }
cancel()
return nil return nil
} }

View File

@@ -8,6 +8,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"io/fs"
"log/slog" "log/slog"
"net/http" "net/http"
"os" "os"
@@ -34,6 +35,7 @@ var (
errOnlyGGUFSupported = errors.New("supplied file was not in GGUF format") errOnlyGGUFSupported = errors.New("supplied file was not in GGUF format")
errUnknownType = errors.New("unknown type") errUnknownType = errors.New("unknown type")
errNeitherFromOrFiles = errors.New("neither 'from' or 'files' was specified") errNeitherFromOrFiles = errors.New("neither 'from' or 'files' was specified")
errFilePath = errors.New("file path must be relative")
) )
func (s *Server) CreateHandler(c *gin.Context) { func (s *Server) CreateHandler(c *gin.Context) {
@@ -46,6 +48,13 @@ func (s *Server) CreateHandler(c *gin.Context) {
return return
} }
for v := range r.Files {
if !fs.ValidPath(v) {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errFilePath.Error()})
return
}
}
name := model.ParseName(cmp.Or(r.Model, r.Name)) name := model.ParseName(cmp.Or(r.Model, r.Name))
if !name.IsValid() { if !name.IsValid() {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg}) c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
@@ -104,7 +113,7 @@ func (s *Server) CreateHandler(c *gin.Context) {
if r.Adapters != nil { if r.Adapters != nil {
adapterLayers, err = convertModelFromFiles(r.Adapters, baseLayers, true, fn) adapterLayers, err = convertModelFromFiles(r.Adapters, baseLayers, true, fn)
if err != nil { if err != nil {
for _, badReq := range []error{errNoFilesProvided, errOnlyOneAdapterSupported, errOnlyGGUFSupported, errUnknownType} { for _, badReq := range []error{errNoFilesProvided, errOnlyOneAdapterSupported, errOnlyGGUFSupported, errUnknownType, errFilePath} {
if errors.Is(err, badReq) { if errors.Is(err, badReq) {
ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest} ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
return return
@@ -221,8 +230,22 @@ func convertFromSafetensors(files map[string]string, baseLayers []*layerGGML, is
return nil, err return nil, err
} }
defer os.RemoveAll(tmpDir) defer os.RemoveAll(tmpDir)
// Set up a root to validate paths
root, err := os.OpenRoot(tmpDir)
if err != nil {
return nil, err
}
defer root.Close()
for fp, digest := range files { for fp, digest := range files {
if !fs.ValidPath(fp) {
return nil, fmt.Errorf("%w: %s", errFilePath, fp)
}
if _, err := root.Stat(fp); err != nil && !errors.Is(err, fs.ErrNotExist) {
// Path is likely outside the root
return nil, fmt.Errorf("%w: %s: %s", errFilePath, err, fp)
}
blobPath, err := GetBlobsPath(digest) blobPath, err := GetBlobsPath(digest)
if err != nil { if err != nil {
return nil, err return nil, err
@@ -270,6 +293,7 @@ func convertFromSafetensors(files map[string]string, baseLayers []*layerGGML, is
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer bin.Close()
f, _, err := ggml.Decode(bin, 0) f, _, err := ggml.Decode(bin, 0)
if err != nil { if err != nil {

106
server/create_test.go Normal file
View File

@@ -0,0 +1,106 @@
package server
import (
"bytes"
"encoding/binary"
"errors"
"os"
"path/filepath"
"strings"
"testing"
"github.com/ollama/ollama/api"
)
func TestConvertFromSafetensors(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
// Helper function to create a new layer and return its digest
makeTemp := func(content string) string {
l, err := NewLayer(strings.NewReader(content), "application/octet-stream")
if err != nil {
t.Fatalf("Failed to create layer: %v", err)
}
return l.Digest
}
// Create a safetensors compatible file with empty JSON content
var buf bytes.Buffer
headerSize := int64(len("{}"))
binary.Write(&buf, binary.LittleEndian, headerSize)
buf.WriteString("{}")
model := makeTemp(buf.String())
config := makeTemp(`{
"architectures": ["LlamaForCausalLM"],
"vocab_size": 32000
}`)
tokenizer := makeTemp(`{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<|endoftext|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
]
}`)
tests := []struct {
name string
filePath string
wantErr error
}{
// Invalid
{
name: "InvalidRelativePathShallow",
filePath: filepath.Join("..", "file.safetensors"),
wantErr: errFilePath,
},
{
name: "InvalidRelativePathDeep",
filePath: filepath.Join("..", "..", "..", "..", "..", "..", "data", "file.txt"),
wantErr: errFilePath,
},
{
name: "InvalidNestedPath",
filePath: filepath.Join("dir", "..", "..", "..", "..", "..", "other.safetensors"),
wantErr: errFilePath,
},
{
name: "AbsolutePathOutsideRoot",
filePath: filepath.Join(os.TempDir(), "model.safetensors"),
wantErr: errFilePath, // Should fail since it's outside tmpDir
},
{
name: "ValidRelativePath",
filePath: "model.safetensors",
wantErr: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create the minimum required file map for convertFromSafetensors
files := map[string]string{
tt.filePath: model,
"config.json": config,
"tokenizer.json": tokenizer,
}
_, err := convertFromSafetensors(files, nil, false, func(resp api.ProgressResponse) {})
if (tt.wantErr == nil && err != nil) ||
(tt.wantErr != nil && err == nil) ||
(tt.wantErr != nil && !errors.Is(err, tt.wantErr)) {
t.Errorf("convertFromSafetensors() error = %v, wantErr %v", err, tt.wantErr)
}
})
}
}

View File

@@ -147,14 +147,23 @@ func (e *Error) UnmarshalJSON(b []byte) error {
return nil return nil
} }
var defaultName = func() names.Name { const DefaultMask = "registry.ollama.ai/library/_:latest"
n := names.Parse("registry.ollama.ai/library/_:latest")
var defaultMask = func() names.Name {
n := names.Parse(DefaultMask)
if !n.IsFullyQualified() { if !n.IsFullyQualified() {
panic("default name is not fully qualified") panic("default mask is not fully qualified")
} }
return n return n
}() }()
// CompleteName returns a fully qualified name by merging the given name with
// the default mask. If the name is already fully qualified, it is returned
// unchanged.
func CompleteName(name string) string {
return names.Merge(names.Parse(name), defaultMask).String()
}
// Registry is a client for performing push and pull operations against an // Registry is a client for performing push and pull operations against an
// Ollama registry. // Ollama registry.
type Registry struct { type Registry struct {
@@ -249,7 +258,7 @@ type PushParams struct {
// //
// The scheme is returned as provided by [names.ParseExtended]. // The scheme is returned as provided by [names.ParseExtended].
func parseName(s, mask string) (scheme string, n names.Name, d blob.Digest, err error) { func parseName(s, mask string) (scheme string, n names.Name, d blob.Digest, err error) {
maskName := defaultName maskName := defaultMask
if mask != "" { if mask != "" {
maskName = names.Parse(mask) maskName = names.Parse(mask)
if !maskName.IsFullyQualified() { if !maskName.IsFullyQualified() {

View File

@@ -86,6 +86,8 @@ func (m *Model) readTensors(fname string) ([]*Tensor, error) {
return nil, err return nil, err
} }
endOfHeader := 8 + headerSize // 8 bytes for header size plus the header itself
// TODO(bmizerany): do something with metadata? This could be another // TODO(bmizerany): do something with metadata? This could be another
// header read if needed. We also need to figure out if the metadata is // header read if needed. We also need to figure out if the metadata is
// present in only one .safetensors file or if each file may have their // present in only one .safetensors file or if each file may have their
@@ -95,7 +97,8 @@ func (m *Model) readTensors(fname string) ([]*Tensor, error) {
tt := make([]*Tensor, 0, len(raws)) tt := make([]*Tensor, 0, len(raws))
for name, raw := range raws { for name, raw := range raws {
if !strings.HasPrefix(name, "model.layer") { if name == "__metadata__" {
// TODO(bmizerany): do something with metadata?
continue continue
} }
var v struct { var v struct {
@@ -112,7 +115,8 @@ func (m *Model) readTensors(fname string) ([]*Tensor, error) {
// TODO(bmizerany): after collecting, validate all offests make // TODO(bmizerany): after collecting, validate all offests make
// tensors contiguous? // tensors contiguous?
begin, end := v.Offsets[0], v.Offsets[1] begin := endOfHeader + v.Offsets[0]
end := endOfHeader + v.Offsets[1]
if err := checkBeginEnd(finfo.Size(), begin, end); err != nil { if err := checkBeginEnd(finfo.Size(), begin, end); err != nil {
return nil, err return nil, err
} }

View File

@@ -228,6 +228,10 @@ func cmdImport(ctx context.Context, c *blob.DiskCache) error {
flag.PrintDefaults() flag.PrintDefaults()
} }
flag.Parse(args) flag.Parse(args)
if *flagAs == "" {
return fmt.Errorf("missing -as flag")
}
as := ollama.CompleteName(*flagAs)
dir := cmp.Or(flag.Arg(0), ".") dir := cmp.Or(flag.Arg(0), ".")
fmt.Fprintf(os.Stderr, "Reading %s\n", dir) fmt.Fprintf(os.Stderr, "Reading %s\n", dir)
@@ -311,7 +315,7 @@ func cmdImport(ctx context.Context, c *blob.DiskCache) error {
if err != nil { if err != nil {
return err return err
} }
return c.Link(*flagAs, d) return c.Link(as, d)
}() }()
}() }()
@@ -340,6 +344,8 @@ func cmdImport(ctx context.Context, c *blob.DiskCache) error {
writeProgress() writeProgress()
case err := <-done: case err := <-done:
writeProgress() writeProgress()
fmt.Println()
fmt.Println("Successfully imported", as)
return err return err
} }
} }