refactor: use the built-in max/min to simplify the code (#12280)

Signed-off-by: russcoss <russcoss@outlook.com>
This commit is contained in:
russcoss
2025-09-16 20:14:21 -04:00
committed by GitHub
parent b225508c9b
commit 05d53457af
4 changed files with 4 additions and 20 deletions

View File

@@ -382,10 +382,7 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
// load creates a new model based on req and loads it. If requireFull is true then the model must be loaded fully onto GPUs
// (if any). Returns whether the scheduler needs to evict a model to make this one fit.
func (s *Scheduler) load(req *LlmRequest, f *ggml.GGML, gpus discover.GpuInfoList, requireFull bool) bool {
numParallel := int(envconfig.NumParallel())
if numParallel < 1 {
numParallel = 1
}
numParallel := max(int(envconfig.NumParallel()), 1)
// Embedding models should always be loaded with parallel=1
if req.model.CheckCapabilities(model.CapabilityCompletion) != nil {