refactor: use the built-in max/min to simplify the code (#12280)

Signed-off-by: russcoss <russcoss@outlook.com>
2025-12-21 14:26:30 +00:00 · 2025-09-16 20:14:21 -04:00
parent b225508c9b
commit 05d53457af
4 changed files with 4 additions and 20 deletions
--- a/server/sched.go
+++ b/server/sched.go
@@ -382,10 +382,7 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
 // load creates a new model based on req and loads it. If requireFull is true then the model must be loaded fully onto GPUs
 // (if any). Returns whether the scheduler needs to evict a model to make this one fit.
 func (s *Scheduler) load(req *LlmRequest, f *ggml.GGML, gpus discover.GpuInfoList, requireFull bool) bool {
-	numParallel := int(envconfig.NumParallel())
-	if numParallel < 1 {
-		numParallel = 1
-	}
+	numParallel := max(int(envconfig.NumParallel()), 1)

 	// Embedding models should always be loaded with parallel=1
 	if req.model.CheckCapabilities(model.CapabilityCompletion) != nil {