Bring back escape valve for llm libraries and fix Jetpack6 crash (#12529)

* Bring back escape valve for llm libraries If the new discovery logic picks the wrong library, this gives users the ability to force a specific one using the same pattern as before. This can also potentially speed up bootstrap discovery if one of the libraries takes a long time to load and ultimately bind to no devices. For example unsupported AMD iGPUS can sometimes take a while to discover and rule out. * Bypass extra discovery on jetpack systems On at least Jetpack6, cuda_v12 appears to expose the iGPU, but crashes later on in cublasInit so if we detect a Jetpack, short-circuit and use that variant.
2025-12-21 22:33:56 +00:00 · 2025-10-07 16:06:14 -07:00
parent bc71278670
commit bd15eba4e4
3 changed files with 46 additions and 8 deletions
--- a/discover/runner.go
+++ b/discover/runner.go
@@ -78,6 +78,8 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
 		}

 		slog.Info("discovering available GPUs...")
+		requested := envconfig.LLMLibrary()
+		jetpack := cudaJetpack()

 		// For our initial discovery pass, we gather all the known GPUs through
 		// all the libraries that were detected. This pass may include GPUs that
@@ -86,6 +88,14 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
 		// times concurrently leading to memory contention
 		for dir := range libDirs {
 			var dirs []string
+			if dir != "" {
+				if requested != "" && filepath.Base(dir) != requested {
+					slog.Debug("skipping available library at users request", "requested", requested, "libDir", dir)
+					continue
+				} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
+					continue
+				}
+			}
 			if dir == "" {
 				dirs = []string{LibOllamaPath}
 			} else {