ggml: Support closing backends

In order to iteratively find the best memory allocation, we need to
be able to free backend memory so we can try again.
This commit is contained in:
Jesse Gross
2025-04-17 17:12:01 -07:00
committed by Jesse Gross
parent d7f4f788d1
commit 756c78cfc7
4 changed files with 71 additions and 24 deletions

View File

@@ -877,6 +877,15 @@ func (s *Server) load(
) {
err := s.initModel(mpath, params, lpath, parallel, kvCacheType, kvSize, multiUserCache)
if err != nil {
var noMem ml.ErrNoMem
if errors.As(err, &noMem) {
// We can't yet handle this but in the future we will
s.cache.Close()
if s.model != nil {
s.model.Backend().Close()
}
}
panic(err)
}