From 72ff5b9d8c7a07df46f7a7db68a42562ddab2994 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 14 Nov 2025 14:36:28 -0800 Subject: [PATCH] log: warn if user overrides detected (#13088) Many failed GPU discovery issues recently can be traced to incorrect override settings. This extra logging should help quickly spot these and guide users to try unsetting them first. --- discover/runner.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/discover/runner.go b/discover/runner.go index bf2110bc..6ce37ef6 100644 --- a/discover/runner.go +++ b/discover/runner.go @@ -65,6 +65,10 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml. } slog.Info("discovering available GPUs...") + + // Warn if any user-overrides are set which could lead to incorrect GPU discovery + overrideWarnings() + requested := envconfig.LLMLibrary() jetpack := cudaJetpack() @@ -449,3 +453,24 @@ func bootstrapDevices(ctx context.Context, ollamaLibDirs []string, extraEnvs map return devices } + +func overrideWarnings() { + anyFound := false + m := envconfig.AsMap() + for _, k := range []string{ + "CUDA_VISIBLE_DEVICES", + "HIP_VISIBLE_DEVICES", + "ROCR_VISIBLE_DEVICES", + "GGML_VK_VISIBLE_DEVICES", + "GPU_DEVICE_ORDINAL", + "HSA_OVERRIDE_GFX_VERSION", + } { + if e, found := m[k]; found && e.Value != "" { + anyFound = true + slog.Warn("user override visible devices", k, e.Value) + } + } + if anyFound { + slog.Warn("if GPUs are not correctly discovered, unset and try again") + } +}