llm: Enable new memory estimates by default

New memory estimates (see #11090 for more information) are now
enabled automatically for all models running on the Ollama engine,
improving both stability and performance through more accurate sizing
and allocation. Models running on the llama engine will continue to
use the original style of memory estimation.
This commit is contained in:
Jesse Gross
2025-09-11 10:30:18 -07:00
committed by Jesse Gross
parent feb18cd710
commit eb10390de9
2 changed files with 1 additions and 9 deletions

View File

@@ -162,11 +162,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
}
}
newEstimates := textProcessor != nil && envconfig.NewMemoryEstimates()
if newEstimates {
slog.Info("enabling new memory estimates")
}
// Verify the requested context size is <= the model training size
trainCtx := f.KV().ContextLength()
if opts.NumCtx > int(trainCtx) && trainCtx > 0 {
@@ -434,7 +429,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
}
}()
if newEstimates {
if textProcessor != nil {
return &ollamaServer{llmServer: s}, nil
} else {
return &llamaServer{llmServer: s, ggml: f}, nil