mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
ggml: Support closing backends
In order to iteratively find the best memory allocation, we need to be able to free backend memory so we can try again.
This commit is contained in:
@@ -70,6 +70,10 @@ func kvCacheTypeFromStr(s string) ml.DType {
|
||||
}
|
||||
|
||||
func (c *InputCache) Close() {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
|
||||
c.cache.Close()
|
||||
}
|
||||
|
||||
|
||||
@@ -877,6 +877,15 @@ func (s *Server) load(
|
||||
) {
|
||||
err := s.initModel(mpath, params, lpath, parallel, kvCacheType, kvSize, multiUserCache)
|
||||
if err != nil {
|
||||
var noMem ml.ErrNoMem
|
||||
if errors.As(err, &noMem) {
|
||||
// We can't yet handle this but in the future we will
|
||||
s.cache.Close()
|
||||
if s.model != nil {
|
||||
s.model.Backend().Close()
|
||||
}
|
||||
}
|
||||
|
||||
panic(err)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user