ggml: Remove allocation status reporting

For each memory allocation we report the size of the (attempted)
allocation and whether it succeeded or failed. The latter status
reporting proved to be not that useful in practice as systems
such as Windows can automatically overflow from VRAM into RAM,
resultings in successful allocations even when there isn't
enough memory where we wanted.

As a result, this information is only used for debug logging,
which isn't worthwhile enough for the amount of code. It
also isn't fully accurate, as multiple allocations may result
in partial failures.
This commit is contained in:
Jesse Gross
2025-09-22 17:27:03 -07:00
committed by Jesse Gross
parent 0469861d9d
commit 734b57da0e
9 changed files with 86 additions and 201 deletions

View File

@@ -158,40 +158,6 @@ func (e ErrNoMem) Error() string {
return fmt.Sprintf("insufficient memory - required allocations: %+v", e.BackendMemory)
}
type AllocationStatus int
const (
// Unallocated memory - have not yet attempted to allocate
Unallocated AllocationStatus = iota
// Failed memory - tried to allocate the memory and did not succeed
Failed
// Allocated memory = tried and succeeded to allocate memory
Allocated
)
// Memory is the size of an allocation and whether it was successful.
type Memory struct {
Size uint64
Status AllocationStatus
}
func (m Memory) String() string {
s := fmt.Sprint(m.Size)
switch m.Status {
case Unallocated:
s += "U"
case Failed:
s += "F"
case Allocated:
s += "A"
}
return s
}
// DeviceMemory provides a breakdown of the memory needed
// per device, such as a CPU or GPU.
type DeviceMemory struct {
@@ -204,39 +170,32 @@ type DeviceMemory struct {
ID string
// Weights is the per-layer memory needed for the model weights.
Weights []Memory
Weights []uint64
// Cache is the per-layer memory needed for the KV cache.
Cache []Memory
Cache []uint64
// Graph is the size of the compute graph. It is not per-layer.
Graph Memory
Graph uint64
}
// Allocated returns the total size of the memory that has been successfully
// allocated on this device
func (m DeviceMemory) Allocated() uint64 {
var mem uint64
func sumMemory(mem []uint64) uint64 {
var sum uint64
for _, w := range m.Weights {
if w.Status == Allocated {
mem += w.Size
}
}
for _, c := range m.Cache {
if c.Status == Allocated {
mem += c.Size
}
}
if m.Graph.Status == Allocated {
mem += m.Graph.Size
for _, m := range mem {
sum += m
}
return mem
return sum
}
func memoryPresent(mem []Memory) bool {
return slices.ContainsFunc(mem, func(m Memory) bool { return m.Size != 0 })
// Size returns the total size of the memory required by this device
func (m DeviceMemory) Size() uint64 {
return sumMemory(m.Weights) + sumMemory(m.Cache) + m.Graph
}
func memoryPresent(mem []uint64) bool {
return slices.ContainsFunc(mem, func(m uint64) bool { return m != 0 })
}
func (m DeviceMemory) LogValue() slog.Value {
@@ -249,7 +208,7 @@ func (m DeviceMemory) LogValue() slog.Value {
attrs = append(attrs, slog.Any("Cache", m.Cache))
}
if m.Graph.Size != 0 {
if m.Graph != 0 {
attrs = append(attrs, slog.Any("Graph", m.Graph))
}
@@ -267,7 +226,7 @@ func (m DeviceMemory) LogValue() slog.Value {
// accommodate that to make forward progress.
type BackendMemory struct {
// InputWeights are always located on the CPU and cannot be moved
InputWeights Memory
InputWeights uint64
// CPU model components are located in system memory. This does not
// include unified memory allocated through the GPU.
@@ -279,7 +238,7 @@ type BackendMemory struct {
func (m BackendMemory) LogValue() slog.Value {
var attrs []slog.Attr
if m.InputWeights.Size != 0 {
if m.InputWeights != 0 {
attrs = append(attrs, slog.Any("InputWeights", m.InputWeights))
}
@@ -291,17 +250,7 @@ func (m BackendMemory) LogValue() slog.Value {
return slog.GroupValue(attrs...)
}
func sumMemory(mem []Memory) uint64 {
var sum uint64
for _, m := range mem {
sum += m.Size
}
return sum
}
// Log prints a high level summary of the memory (allocated or not)
// Log prints a high level summary of the memory
func (m BackendMemory) Log(level slog.Level) {
var total uint64
@@ -311,7 +260,7 @@ func (m BackendMemory) Log(level slog.Level) {
total += sum
}
}
if sum := m.InputWeights.Size + sumMemory(m.CPU.Weights); sum > 0 {
if sum := m.InputWeights + sumMemory(m.CPU.Weights); sum > 0 {
slog.Log(context.TODO(), level, "model weights", "device", m.CPU.Name, "size", format.HumanBytes2(sum))
total += sum
}
@@ -328,12 +277,12 @@ func (m BackendMemory) Log(level slog.Level) {
}
for _, gpu := range m.GPUs {
if sum := gpu.Graph.Size; sum > 0 {
if sum := gpu.Graph; sum > 0 {
slog.Log(context.TODO(), level, "compute graph", "device", gpu.Name, "size", format.HumanBytes2(sum))
total += sum
}
}
if sum := m.CPU.Graph.Size; sum > 0 {
if sum := m.CPU.Graph; sum > 0 {
slog.Log(context.TODO(), level, "compute graph", "device", m.CPU.Name, "size", format.HumanBytes2(sum))
total += sum
}