mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 14:26:30 +00:00
discovery: only retry AMD GPUs (#12894)
* discovery: only retry AMD GPUs CUDA and Vulkan don't crash on unsupported devices, so retry isn't necessary. This also refactors the code to shift the Library specific logic into the ml package. * review comments
This commit is contained in:
@@ -27,7 +27,6 @@ var (
|
||||
deviceMu sync.Mutex
|
||||
devices []ml.DeviceInfo
|
||||
libDirs map[string]struct{}
|
||||
rocmDir string
|
||||
exe string
|
||||
bootstrapped bool
|
||||
)
|
||||
@@ -61,14 +60,6 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
libDirs[filepath.Dir(file)] = struct{}{}
|
||||
}
|
||||
|
||||
// Our current packaging model places ggml-hip in the main directory
|
||||
// but keeps rocm in an isolated directory. We have to add it to
|
||||
// the [LD_LIBRARY_]PATH so ggml-hip will load properly
|
||||
rocmDir = filepath.Join(ml.LibOllamaPath, "rocm")
|
||||
if _, err := os.Stat(rocmDir); err != nil {
|
||||
rocmDir = ""
|
||||
}
|
||||
|
||||
if len(libDirs) == 0 {
|
||||
libDirs[""] = struct{}{}
|
||||
}
|
||||
@@ -82,9 +73,20 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
// are enumerated, but not actually supported.
|
||||
// We run this in serial to avoid potentially initializing a GPU multiple
|
||||
// times concurrently leading to memory contention
|
||||
// TODO refactor so we group the lib dirs and do serial per version, but parallel for different libs
|
||||
for dir := range libDirs {
|
||||
// Typically bootstrapping takes < 1s, but on some systems, with devices
|
||||
// in low power/idle mode, initialization can take multiple seconds. We
|
||||
// set a longer timeout just for bootstrap discovery to reduce the chance
|
||||
// of giving up too quickly
|
||||
bootstrapTimeout := 30 * time.Second
|
||||
if runtime.GOOS == "windows" {
|
||||
// On Windows with Defender enabled, AV scanning of the DLLs
|
||||
// takes place sequentially and this can significantly increase
|
||||
// the time it takes too do the initial discovery pass.
|
||||
// Subsequent loads will be faster as the scan results are
|
||||
// cached
|
||||
bootstrapTimeout = 90 * time.Second
|
||||
}
|
||||
var dirs []string
|
||||
if dir != "" {
|
||||
if requested != "" && filepath.Base(dir) != requested {
|
||||
@@ -93,21 +95,11 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if dir == "" {
|
||||
dirs = []string{ml.LibOllamaPath}
|
||||
} else {
|
||||
dirs = []string{ml.LibOllamaPath, dir}
|
||||
} else {
|
||||
dirs = []string{ml.LibOllamaPath}
|
||||
}
|
||||
|
||||
// ROCm can take a long time on some systems, so give it more time before giving up
|
||||
if dir != "" && strings.Contains(filepath.Base(dir), "rocm") {
|
||||
bootstrapTimeout = 60 * time.Second
|
||||
}
|
||||
// Typically bootstrapping takes < 1s, but on some systems, with devices
|
||||
// in low power/idle mode, initialization can take multiple seconds. We
|
||||
// set a long timeout just for bootstrap discovery to reduce the chance
|
||||
// of giving up too quickly
|
||||
ctx1stPass, cancel := context.WithTimeout(ctx, bootstrapTimeout)
|
||||
defer cancel()
|
||||
|
||||
@@ -117,6 +109,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
|
||||
// In the second pass, we more deeply initialize the GPUs to weed out devices that
|
||||
// aren't supported by a given library. We run this phase in parallel to speed up discovery.
|
||||
// Only devices that need verification are included in this pass
|
||||
slog.Debug("evluating which if any devices to filter out", "initial_count", len(devices))
|
||||
ctx2ndPass, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
@@ -125,35 +118,16 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
supportedMu := sync.Mutex{}
|
||||
supported := make(map[string]map[string]map[string]int) // [Library][libDir][ID] = pre-deletion devices index
|
||||
for i := range devices {
|
||||
libDir := devices[i].LibraryPath[len(devices[i].LibraryPath)-1]
|
||||
if devices[i].Library == "Metal" {
|
||||
if !devices[i].NeedsInitValidation() {
|
||||
continue
|
||||
}
|
||||
slog.Debug("verifying GPU is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "id", devices[i].ID, "pci_id", devices[i].PCIID)
|
||||
libDir := devices[i].LibraryPath[len(devices[i].LibraryPath)-1]
|
||||
slog.Debug("verifying device is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "id", devices[i].ID, "pci_id", devices[i].PCIID)
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
var envVar string
|
||||
id := devices[i].ID
|
||||
if devices[i].Library == "ROCm" {
|
||||
if runtime.GOOS != "linux" {
|
||||
envVar = "HIP_VISIBLE_DEVICES"
|
||||
} else {
|
||||
envVar = "ROCR_VISIBLE_DEVICES"
|
||||
}
|
||||
} else if devices[i].Library == "CUDA" {
|
||||
envVar = "CUDA_VISIBLE_DEVICES"
|
||||
} else if devices[i].Library == "Vulkan" {
|
||||
id = devices[i].FilteredID
|
||||
envVar = "GGML_VK_VISIBLE_DEVICES"
|
||||
} else {
|
||||
slog.Error("Unknown Library:" + devices[i].Library)
|
||||
}
|
||||
|
||||
extraEnvs := map[string]string{
|
||||
"GGML_CUDA_INIT": "1", // force deep initialization to trigger crash on unsupported GPUs
|
||||
envVar: id, // Filter to just this one GPU
|
||||
}
|
||||
extraEnvs := ml.GetVisibleDevicesEnv(devices[i : i+1])
|
||||
devices[i].AddInitValidation(extraEnvs)
|
||||
if len(bootstrapDevices(ctx2ndPass, devices[i].LibraryPath, extraEnvs)) == 0 {
|
||||
slog.Debug("filtering device which didn't fully initialize",
|
||||
"id", devices[i].ID,
|
||||
@@ -178,26 +152,28 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
wg.Wait()
|
||||
logutil.Trace("supported GPU library combinations before filtering", "supported", supported)
|
||||
|
||||
filterOutVulkanThatAreSupportedByOtherGPU(needsDelete)
|
||||
|
||||
// Mark for deletion any overlaps - favoring the library version that can cover all GPUs if possible
|
||||
filterOverlapByLibrary(supported, needsDelete)
|
||||
|
||||
// TODO if we ever support multiple ROCm library versions this algorithm will need to be adjusted to keep the rocmID numeric value correct
|
||||
rocmID := 0
|
||||
// Any Libraries that utilize numeric IDs need adjusting based on any possible filtering taking place
|
||||
postFilteredID := map[string]int{}
|
||||
for i := 0; i < len(needsDelete); i++ {
|
||||
if needsDelete[i] {
|
||||
logutil.Trace("removing unsupported or overlapping GPU combination", "libDir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1], "description", devices[i].Description, "compute", devices[i].Compute(), "pci_id", devices[i].PCIID)
|
||||
devices = append(devices[:i], devices[i+1:]...)
|
||||
needsDelete = append(needsDelete[:i], needsDelete[i+1:]...)
|
||||
i--
|
||||
} else if devices[i].Library == "ROCm" {
|
||||
} else {
|
||||
if _, ok := postFilteredID[devices[i].Library]; !ok {
|
||||
postFilteredID[devices[i].Library] = 0
|
||||
}
|
||||
if _, err := strconv.Atoi(devices[i].ID); err == nil {
|
||||
// Replace the numeric ID with the post-filtered IDs
|
||||
devices[i].FilteredID = devices[i].ID
|
||||
devices[i].ID = strconv.Itoa(rocmID)
|
||||
slog.Debug("adjusting filtering IDs", "FilterID", devices[i].ID, "new_ID", strconv.Itoa(postFilteredID[devices[i].Library]))
|
||||
devices[i].FilterID = devices[i].ID
|
||||
devices[i].ID = strconv.Itoa(postFilteredID[devices[i].Library])
|
||||
}
|
||||
rocmID++
|
||||
postFilteredID[devices[i].Library]++
|
||||
}
|
||||
}
|
||||
|
||||
@@ -214,7 +190,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
case ml.DuplicateDevice:
|
||||
// Different library, choose based on priority
|
||||
var droppedDevice ml.DeviceInfo
|
||||
if devices[i].Library == "CUDA" || devices[i].Library == "ROCm" {
|
||||
if devices[i].PreferredLibrary(devices[j]) {
|
||||
droppedDevice = devices[j]
|
||||
} else {
|
||||
droppedDevice = devices[i]
|
||||
@@ -363,38 +339,6 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
return devices
|
||||
}
|
||||
|
||||
func filterOutVulkanThatAreSupportedByOtherGPU(needsDelete []bool) {
|
||||
// Filter out Vulkan devices that share a PCI ID with a non-Vulkan device that is not marked for deletion
|
||||
for i := range devices {
|
||||
if devices[i].Library != "Vulkan" || needsDelete[i] {
|
||||
continue
|
||||
}
|
||||
if devices[i].PCIID == "" {
|
||||
continue
|
||||
}
|
||||
for j := range devices {
|
||||
if i == j {
|
||||
continue
|
||||
}
|
||||
if devices[j].PCIID == "" {
|
||||
continue
|
||||
}
|
||||
if devices[j].PCIID == devices[i].PCIID && devices[j].Library != "Vulkan" && !needsDelete[j] {
|
||||
needsDelete[i] = true
|
||||
slog.Debug("filtering device with duplicate PCI ID",
|
||||
"id", devices[i].ID,
|
||||
"library", devices[i].Library,
|
||||
"libdir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1],
|
||||
"pci_id", devices[i].PCIID,
|
||||
"kept_id", devices[j].ID,
|
||||
"kept_library", devices[j].Library,
|
||||
)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func filterOverlapByLibrary(supported map[string]map[string]map[string]int, needsDelete []bool) {
|
||||
// For multi-GPU systems, use the newest version that supports all the GPUs
|
||||
for _, byLibDirs := range supported {
|
||||
|
||||
@@ -41,7 +41,7 @@ func LogDetails(devices []ml.DeviceInfo) {
|
||||
}
|
||||
slog.Info("inference compute",
|
||||
"id", dev.ID,
|
||||
"filtered_id", dev.FilteredID,
|
||||
"filter_id", dev.FilterID,
|
||||
"library", dev.Library,
|
||||
"compute", dev.Compute(),
|
||||
"name", dev.Name,
|
||||
|
||||
Reference in New Issue
Block a user