mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 14:26:30 +00:00
llm: Enable new memory estimates by default
New memory estimates (see #11090 for more information) are now enabled automatically for all models running on the Ollama engine, improving both stability and performance through more accurate sizing and allocation. Models running on the llama engine will continue to use the original style of memory estimation.
This commit is contained in:
@@ -162,11 +162,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||
}
|
||||
}
|
||||
|
||||
newEstimates := textProcessor != nil && envconfig.NewMemoryEstimates()
|
||||
if newEstimates {
|
||||
slog.Info("enabling new memory estimates")
|
||||
}
|
||||
|
||||
// Verify the requested context size is <= the model training size
|
||||
trainCtx := f.KV().ContextLength()
|
||||
if opts.NumCtx > int(trainCtx) && trainCtx > 0 {
|
||||
@@ -434,7 +429,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||
}
|
||||
}()
|
||||
|
||||
if newEstimates {
|
||||
if textProcessor != nil {
|
||||
return &ollamaServer{llmServer: s}, nil
|
||||
} else {
|
||||
return &llamaServer{llmServer: s, ggml: f}, nil
|
||||
|
||||
Reference in New Issue
Block a user