mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 14:26:30 +00:00
log: trace logging for scheduler (#12961)
This commit is contained in:
@@ -143,6 +143,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
|||||||
slog.Debug("pending request cancelled or timed out, skipping scheduling")
|
slog.Debug("pending request cancelled or timed out, skipping scheduling")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
logutil.Trace("processing incoming request", "model", pending.model.ModelPath)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
var runnerToExpire *runnerRef
|
var runnerToExpire *runnerRef
|
||||||
@@ -161,6 +162,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
|||||||
runnerToExpire = runner
|
runnerToExpire = runner
|
||||||
} else {
|
} else {
|
||||||
// Runner is usable, return it
|
// Runner is usable, return it
|
||||||
|
logutil.Trace("using existing loaded runner", "model", pending.model.ModelPath)
|
||||||
pending.useLoadedRunner(runner, s.finishedReqCh)
|
pending.useLoadedRunner(runner, s.finishedReqCh)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@@ -174,13 +176,16 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
|||||||
if pending.opts.NumGPU == 0 {
|
if pending.opts.NumGPU == 0 {
|
||||||
gpus = []ml.DeviceInfo{}
|
gpus = []ml.DeviceInfo{}
|
||||||
} else {
|
} else {
|
||||||
|
logutil.Trace("refreshing GPU list", "model", pending.model.ModelPath)
|
||||||
gpus = s.getGpuFn(ctx, runnersSnapshot)
|
gpus = s.getGpuFn(ctx, runnersSnapshot)
|
||||||
}
|
}
|
||||||
|
logutil.Trace("refreshing system information", "model", pending.model.ModelPath)
|
||||||
systemInfo := s.getSystemInfoFn()
|
systemInfo := s.getSystemInfoFn()
|
||||||
if maxRunners <= 0 {
|
if maxRunners <= 0 {
|
||||||
// No user specified MaxRunners, so figure out what automatic setting to use for the next load attempt
|
// No user specified MaxRunners, so figure out what automatic setting to use for the next load attempt
|
||||||
if pending.opts.NumGPU == 0 {
|
if pending.opts.NumGPU == 0 {
|
||||||
// Need to get actual GPU list to set the correct default max models
|
// Need to get actual GPU list to set the correct default max models
|
||||||
|
logutil.Trace("refreshing GPU list", "model", pending.model.ModelPath)
|
||||||
g := s.getGpuFn(ctx, runnersSnapshot)
|
g := s.getGpuFn(ctx, runnersSnapshot)
|
||||||
maxRunners = uint(defaultModelsPerGPU * max(len(g), 1))
|
maxRunners = uint(defaultModelsPerGPU * max(len(g), 1))
|
||||||
} else {
|
} else {
|
||||||
@@ -190,6 +195,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Load model for fitting
|
// Load model for fitting
|
||||||
|
logutil.Trace("loading model metadata", "model", pending.model.ModelPath)
|
||||||
ggml, err := llm.LoadModel(pending.model.ModelPath, 1024)
|
ggml, err := llm.LoadModel(pending.model.ModelPath, 1024)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
pending.errCh <- err
|
pending.errCh <- err
|
||||||
@@ -197,6 +203,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Update free memory from currently loaded models
|
// Update free memory from currently loaded models
|
||||||
|
logutil.Trace("updating free space", "gpu_count", len(gpus), "model", pending.model.ModelPath)
|
||||||
s.updateFreeSpace(gpus)
|
s.updateFreeSpace(gpus)
|
||||||
|
|
||||||
if loadedCount == 0 {
|
if loadedCount == 0 {
|
||||||
@@ -208,7 +215,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
|||||||
|
|
||||||
// More than one loaded model, so we have to see if the
|
// More than one loaded model, so we have to see if the
|
||||||
// new one fits
|
// new one fits
|
||||||
|
logutil.Trace("loading additional model", "model", pending.model.ModelPath)
|
||||||
needEvict := s.loadFn(pending, ggml, systemInfo, gpus, true)
|
needEvict := s.loadFn(pending, ggml, systemInfo, gpus, true)
|
||||||
if !needEvict {
|
if !needEvict {
|
||||||
slog.Debug("new model fits with existing models, loading")
|
slog.Debug("new model fits with existing models, loading")
|
||||||
|
|||||||
Reference in New Issue
Block a user