mac: fix stale VRAM data (#12972)

The scheduler updates free VRAM based on current loaded models.  This was
mutating the persisted list of GPUs, and when coupled with the non-refreshing
logic for Metal that lead to stale low VRAM reporting after unload.  The fix is
to make sure the GPU discovery always returns a copy so the schedulers GPU list
is in fact ephemeral and doesn't leak any temporary adjustments back into the
persistent list.
This commit is contained in:
Daniel Hiltgen
2025-11-05 11:55:17 -08:00
committed by GitHub
parent f89fc1cadd
commit 6aa7283076

View File

@@ -237,7 +237,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
} else { } else {
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" { if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
// metal never updates free VRAM // metal never updates free VRAM
return devices return append([]ml.DeviceInfo{}, devices...)
} }
slog.Debug("refreshing free memory") slog.Debug("refreshing free memory")
@@ -336,7 +336,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
} }
} }
return devices return append([]ml.DeviceInfo{}, devices...)
} }
func filterOverlapByLibrary(supported map[string]map[string]map[string]int, needsDelete []bool) { func filterOverlapByLibrary(supported map[string]map[string]map[string]int, needsDelete []bool) {