ggml: Support closing backends

In order to iteratively find the best memory allocation, we need to be able to free backend memory so we can try again.
2025-12-21 22:33:56 +00:00 · 2025-04-17 17:12:01 -07:00
parent d7f4f788d1
commit 756c78cfc7
4 changed files with 71 additions and 24 deletions
--- a/runner/ollamarunner/cache.go
+++ b/runner/ollamarunner/cache.go
@@ -70,6 +70,10 @@ func kvCacheTypeFromStr(s string) ml.DType {
 }

 func (c *InputCache) Close() {
+	if c == nil {
+		return
+	}
+
 	c.cache.Close()
 }

--- a/runner/ollamarunner/runner.go
+++ b/runner/ollamarunner/runner.go
@@ -877,6 +877,15 @@ func (s *Server) load(
 ) {
 	err := s.initModel(mpath, params, lpath, parallel, kvCacheType, kvSize, multiUserCache)
 	if err != nil {
+		var noMem ml.ErrNoMem
+		if errors.As(err, &noMem) {
+			// We can't yet handle this but in the future we will
+			s.cache.Close()
+			if s.model != nil {
+				s.model.Backend().Close()
+			}
+		}
+
 		panic(err)
 	}