Build multiple CPU variants and pick the best

This reduces the built-in linux version to not use any vector extensions which enables the resulting builds to run under Rosetta on MacOS in Docker. Then at runtime it checks for the actual CPU vector extensions and loads the best CPU library available
2025-12-23 15:08:27 +00:00 · 2024-01-07 15:48:05 -08:00
parent 052b33b81b
commit d88c527be3
15 changed files with 202 additions and 66 deletions
--- a/llm/ext_server.go
+++ b/llm/ext_server.go
@@ -0,0 +1,17 @@
+//go:build !darwin
+
+package llm
+
+import (
+	"fmt"
+
+	"github.com/jmorganca/ollama/api"
+)
+
+func newDefaultExtServer(model string, adapters, projectors []string, opts api.Options) (extServer, error) {
+	// On windows and linux we always load the llama.cpp libraries dynamically to avoid startup DLL dependencies
+	// This ensures we can update the PATH at runtime to get everything loaded
+
+	// This should never happen as we'll always try to load one or more cpu dynamic libaries before hitting default
+	return nil, fmt.Errorf("no available default llm library")
+}