mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
DRY out the runner lifecycle code (#12540)
* DRY out the runner lifecycle code Now that discovery uses the runners as well, this unifies the runner spawning code into a single place. This also unifies GPU discovery types with the newer ml.DeviceInfo * win: make incremental builds better Place build artifacts in discrete directories so incremental builds don't have to start fresh * Adjust sort order to consider iGPUs * handle cpu inference oom scenarios * review comments
This commit is contained in:
164
discover/gpu.go
164
discover/gpu.go
@@ -1,16 +1,13 @@
|
||||
package discover
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/ml"
|
||||
)
|
||||
|
||||
@@ -18,159 +15,28 @@ import (
|
||||
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
|
||||
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
||||
|
||||
func GetCPUInfo() GpuInfo {
|
||||
mem, err := GetCPUMem()
|
||||
// GetSystemInfo returns the last cached state of the GPUs on the system
|
||||
func GetSystemInfo() ml.SystemInfo {
|
||||
memInfo, err := GetCPUMem()
|
||||
if err != nil {
|
||||
slog.Warn("error looking up system memory", "error", err)
|
||||
}
|
||||
|
||||
return GpuInfo{
|
||||
memInfo: mem,
|
||||
DeviceID: ml.DeviceID{
|
||||
Library: "cpu",
|
||||
ID: "0",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func GetGPUInfo(ctx context.Context, runners []FilteredRunnerDiscovery) GpuInfoList {
|
||||
devs := GPUDevices(ctx, runners)
|
||||
return devInfoToInfoList(devs)
|
||||
}
|
||||
|
||||
func devInfoToInfoList(devs []ml.DeviceInfo) GpuInfoList {
|
||||
resp := []GpuInfo{}
|
||||
// Our current packaging model places ggml-hip in the main directory
|
||||
// but keeps rocm in an isolated directory. We have to add it to
|
||||
// the [LD_LIBRARY_]PATH so ggml-hip will load properly
|
||||
rocmDir := filepath.Join(LibOllamaPath, "rocm")
|
||||
if _, err := os.Stat(rocmDir); err != nil {
|
||||
rocmDir = ""
|
||||
var threadCount int
|
||||
cpus := GetCPUDetails()
|
||||
for _, c := range cpus {
|
||||
threadCount += c.CoreCount - c.EfficiencyCoreCount
|
||||
}
|
||||
|
||||
for _, dev := range devs {
|
||||
info := GpuInfo{
|
||||
DeviceID: dev.DeviceID,
|
||||
filterID: dev.FilteredID,
|
||||
Name: dev.Description,
|
||||
memInfo: memInfo{
|
||||
TotalMemory: dev.TotalMemory,
|
||||
FreeMemory: dev.FreeMemory,
|
||||
},
|
||||
// TODO can we avoid variant
|
||||
DependencyPath: dev.LibraryPath,
|
||||
DriverMajor: dev.DriverMajor,
|
||||
DriverMinor: dev.DriverMinor,
|
||||
ComputeMajor: dev.ComputeMajor,
|
||||
ComputeMinor: dev.ComputeMinor,
|
||||
}
|
||||
if dev.Library == "CUDA" || dev.Library == "ROCm" {
|
||||
info.MinimumMemory = 457 * format.MebiByte
|
||||
}
|
||||
if dev.Library == "ROCm" && rocmDir != "" {
|
||||
info.DependencyPath = append(info.DependencyPath, rocmDir)
|
||||
}
|
||||
// TODO any special processing of Vulkan devices?
|
||||
resp = append(resp, info)
|
||||
}
|
||||
if len(resp) == 0 {
|
||||
mem, err := GetCPUMem()
|
||||
if err != nil {
|
||||
slog.Warn("error looking up system memory", "error", err)
|
||||
}
|
||||
|
||||
resp = append(resp, GpuInfo{
|
||||
memInfo: mem,
|
||||
DeviceID: ml.DeviceID{
|
||||
Library: "cpu",
|
||||
ID: "0",
|
||||
},
|
||||
})
|
||||
}
|
||||
return resp
|
||||
}
|
||||
|
||||
// Given the list of GPUs this instantiation is targeted for,
|
||||
// figure out the visible devices environment variable
|
||||
//
|
||||
// If different libraries are detected, the first one is what we use
|
||||
func (l GpuInfoList) GetVisibleDevicesEnv() []string {
|
||||
if len(l) == 0 {
|
||||
return nil
|
||||
}
|
||||
res := []string{}
|
||||
envVar := rocmGetVisibleDevicesEnv(l)
|
||||
if envVar != "" {
|
||||
res = append(res, envVar)
|
||||
}
|
||||
envVar = vkGetVisibleDevicesEnv(l)
|
||||
if envVar != "" {
|
||||
res = append(res, envVar)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
|
||||
ids := []string{}
|
||||
for _, info := range gpuInfo {
|
||||
if info.Library != "ROCm" {
|
||||
continue
|
||||
}
|
||||
// If the devices requires a numeric ID, for filtering purposes, we use the unfiltered ID number
|
||||
if info.filterID != "" {
|
||||
ids = append(ids, info.filterID)
|
||||
} else {
|
||||
ids = append(ids, info.ID)
|
||||
}
|
||||
}
|
||||
if len(ids) == 0 {
|
||||
return ""
|
||||
}
|
||||
envVar := "ROCR_VISIBLE_DEVICES="
|
||||
if runtime.GOOS != "linux" {
|
||||
envVar = "HIP_VISIBLE_DEVICES="
|
||||
}
|
||||
// There are 3 potential env vars to use to select GPUs.
|
||||
// ROCR_VISIBLE_DEVICES supports UUID or numeric but does not work on Windows
|
||||
// HIP_VISIBLE_DEVICES supports numeric IDs only
|
||||
// GPU_DEVICE_ORDINAL supports numeric IDs only
|
||||
return envVar + strings.Join(ids, ",")
|
||||
}
|
||||
|
||||
func vkGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
|
||||
ids := []string{}
|
||||
for _, info := range gpuInfo {
|
||||
if info.Library != "Vulkan" {
|
||||
continue
|
||||
}
|
||||
if info.filterID != "" {
|
||||
ids = append(ids, info.filterID)
|
||||
} else {
|
||||
ids = append(ids, info.ID)
|
||||
}
|
||||
}
|
||||
if len(ids) == 0 {
|
||||
return ""
|
||||
}
|
||||
envVar := "GGML_VK_VISIBLE_DEVICES="
|
||||
return envVar + strings.Join(ids, ",")
|
||||
}
|
||||
|
||||
// GetSystemInfo returns the last cached state of the GPUs on the system
|
||||
func GetSystemInfo() SystemInfo {
|
||||
deviceMu.Lock()
|
||||
defer deviceMu.Unlock()
|
||||
gpus := devInfoToInfoList(devices)
|
||||
if len(gpus) == 1 && gpus[0].Library == "cpu" {
|
||||
gpus = []GpuInfo{}
|
||||
if threadCount == 0 {
|
||||
// Fall back to Go's num CPU
|
||||
threadCount = runtime.NumCPU()
|
||||
}
|
||||
|
||||
return SystemInfo{
|
||||
System: CPUInfo{
|
||||
CPUs: GetCPUDetails(),
|
||||
GpuInfo: GetCPUInfo(),
|
||||
},
|
||||
GPUs: gpus,
|
||||
return ml.SystemInfo{
|
||||
ThreadCount: threadCount,
|
||||
TotalMemory: memInfo.TotalMemory,
|
||||
FreeMemory: memInfo.FreeMemory,
|
||||
FreeSwap: memInfo.FreeSwap,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user