diff --git a/discover/gpu.go b/discover/gpu.go index f4d002f2..15d1e79f 100644 --- a/discover/gpu.go +++ b/discover/gpu.go @@ -2,7 +2,6 @@ package discover import ( "context" - "fmt" "log/slog" "os" "path/filepath" @@ -62,17 +61,14 @@ func devInfoToInfoList(devs []ml.DeviceInfo) GpuInfoList { DependencyPath: dev.LibraryPath, DriverMajor: dev.DriverMajor, DriverMinor: dev.DriverMinor, + ComputeMajor: dev.ComputeMajor, + ComputeMinor: dev.ComputeMinor, } if dev.Library == "CUDA" || dev.Library == "ROCm" { info.MinimumMemory = 457 * format.MebiByte } - if dev.Library == "ROCm" { - info.Compute = fmt.Sprintf("gfx%x%02x", dev.ComputeMajor, dev.ComputeMinor) - if rocmDir != "" { - info.DependencyPath = append(info.DependencyPath, rocmDir) - } - } else { - info.Compute = fmt.Sprintf("%d.%d", dev.ComputeMajor, dev.ComputeMinor) + if dev.Library == "ROCm" && rocmDir != "" { + info.DependencyPath = append(info.DependencyPath, rocmDir) } resp = append(resp, info) } diff --git a/discover/types.go b/discover/types.go index 718809f4..f0c3989d 100644 --- a/discover/types.go +++ b/discover/types.go @@ -37,9 +37,10 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"? UnreliableFreeMemory bool // GPU information - filterID string // AMD Workaround: The numeric ID of the device used to filter out other devices - Name string `json:"name"` // user friendly name if available - Compute string `json:"compute"` // Compute Capability or gfx + filterID string // AMD Workaround: The numeric ID of the device used to filter out other devices + Name string `json:"name"` // user friendly name if available + ComputeMajor int `json:"compute_major"` // Compute Capability or gfx + ComputeMinor int `json:"compute_minor"` // Driver Information - TODO no need to put this on each GPU DriverMajor int `json:"driver_major,omitempty"` @@ -173,7 +174,7 @@ func (l GpuInfoList) FlashAttentionSupported() bool { for _, gpu := range l { supportsFA := gpu.Library == "cpu" || gpu.Name == "Metal" || gpu.Library == "Metal" || - (gpu.Library == "CUDA" && gpu.DriverMajor >= 7) || + (gpu.Library == "CUDA" && gpu.DriverMajor >= 7 && !(gpu.ComputeMajor == 7 && gpu.ComputeMinor == 2)) || // We don't have kernels for Jetson Xavier gpu.Library == "ROCm" if !supportsFA { diff --git a/llm/memory.go b/llm/memory.go index 4a54b331..aa4927f1 100644 --- a/llm/memory.go +++ b/llm/memory.go @@ -266,11 +266,18 @@ func estimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin } // Only include GPUs that can fit the graph, gpu minimum, the layer buffer and at least more layer if gpus[i].FreeMemory < overhead+gzo+max(graphPartialOffload, graphFullOffload)+gpus[i].MinimumMemory+2*layerSize { + var compute string + if gpus[i].Library == "ROCm" { + compute = fmt.Sprintf("gfx%x%02x", gpus[i].ComputeMajor, gpus[i].ComputeMinor) + } else { + compute = fmt.Sprintf("%d.%d", gpus[i].ComputeMajor, gpus[i].ComputeMinor) + } + slog.Debug("gpu has too little memory to allocate any layers", "id", gpus[i].ID, "library", gpus[i].Library, "variant", gpus[i].Variant, - "compute", gpus[i].Compute, + "compute", compute, "driver", fmt.Sprintf("%d.%d", gpus[i].DriverMajor, gpus[i].DriverMinor), "name", gpus[i].Name, "total", format.HumanBytes2(gpus[i].TotalMemory),