From 6aa72830763cf694da998f5305de89701c75cea0 Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <dhiltgen@users.noreply.github.com>
Date: Wed, 5 Nov 2025 11:55:17 -0800
Subject: [PATCH] mac: fix stale VRAM data (#12972)

The scheduler updates free VRAM based on current loaded models.  This was
mutating the persisted list of GPUs, and when coupled with the non-refreshing
logic for Metal that lead to stale low VRAM reporting after unload.  The fix is
to make sure the GPU discovery always returns a copy so the schedulers GPU list
is in fact ephemeral and doesn't leak any temporary adjustments back into the
persistent list.
---
 discover/runner.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/discover/runner.go b/discover/runner.go
index 65a542e2..4d44dae2 100644
--- a/discover/runner.go
+++ b/discover/runner.go
@@ -237,7 +237,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 	} else {
 		if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
 			// metal never updates free VRAM
-			return devices
+			return append([]ml.DeviceInfo{}, devices...)
 		}
 
 		slog.Debug("refreshing free memory")
@@ -336,7 +336,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 		}
 	}
 
-	return devices
+	return append([]ml.DeviceInfo{}, devices...)
 }
 
 func filterOverlapByLibrary(supported map[string]map[string]map[string]int, needsDelete []bool) {