From 6aa72830763cf694da998f5305de89701c75cea0 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Wed, 5 Nov 2025 11:55:17 -0800 Subject: [PATCH] mac: fix stale VRAM data (#12972) The scheduler updates free VRAM based on current loaded models. This was mutating the persisted list of GPUs, and when coupled with the non-refreshing logic for Metal that lead to stale low VRAM reporting after unload. The fix is to make sure the GPU discovery always returns a copy so the schedulers GPU list is in fact ephemeral and doesn't leak any temporary adjustments back into the persistent list. --- discover/runner.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/discover/runner.go b/discover/runner.go index 65a542e2..4d44dae2 100644 --- a/discover/runner.go +++ b/discover/runner.go @@ -237,7 +237,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml. } else { if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" { // metal never updates free VRAM - return devices + return append([]ml.DeviceInfo{}, devices...) } slog.Debug("refreshing free memory") @@ -336,7 +336,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml. } } - return devices + return append([]ml.DeviceInfo{}, devices...) } func filterOverlapByLibrary(supported map[string]map[string]map[string]int, needsDelete []bool) {